#!/usr/bin/perl use strict; use subs; use Getopt::Std; require XML::Simple; use Data::Dumper; my %args = getParams(); my $xs = new XML::Simple(); my %species = ( 'baboon' => 0, 'cat' => 1, 'chicken' => 2, 'chimp' => 3, 'cow' => 4, 'dog' => 5, 'fugu' => 6, 'mouse' => 7, 'pig' => 8, 'rat' => 9, 'zfish' => 10 ); ###################################### # Create and Initialize Result Arrays ###################################### my @pass; my @fail; my @longest_pass; my @longest_fail; my @type_grouping; my @single_pass; my @single_fail; for (my $i=0; $iXMLin($args{file},keyattr => {}); my $ctr = 0; for (my $i=0; $i<@{$hybridize->{probe}}; $i++) { for (my $j=0; $j<@{$hybridize->{probe}->[$i]->{results}}; $j++) { if ((ref($hybridize->{probe}->[$i]->{results}->[$j]->{species1_align}) ne "HASH") && (ref($hybridize->{probe}->[$i]->{results}->[$j]->{result}) ne "HASH")) { my $hybrid_species = $hybridize->{probe}->[$i]->{results}->[$j]->{species}; my $species_idx = $species{$hybrid_species}; my $alignment_mask = $hybridize->{probe}->[$i]->{results}->[$j]->{alignment_mask}; my $temp= $hybridize->{probe}->[$i]->{results}->[$j]->{species2_align}; my $num_mismatches = count_mismatches($alignment_mask); my $hybridize_result = $hybridize->{probe}->[$i]->{results}->[$j]->{result}; if ($hybridize_result eq "PASS") { $pass[$species_idx][$num_mismatches] = $pass[$species_idx][$num_mismatches] + 1; } else { $fail[$species_idx][$num_mismatches] = $fail[$species_idx][$num_mismatches] + 1; } if (($num_mismatches > 0) && ($num_mismatches < 4)) { my ($ts, $tv, $gaps) = identify_mismatches($alignment_mask); $type_grouping[$ctr][0] = $num_mismatches; $type_grouping[$ctr][1] = $ts; $type_grouping[$ctr][2] = $tv; $type_grouping[$ctr][3] = $gaps; $type_grouping[$ctr][4] = $hybridize_result; $ctr++; my $longest_identical = longest_sequence($alignment_mask); if ($hybridize_result eq "PASS") { $longest_pass[$num_mismatches][$longest_identical] = $longest_pass[$num_mismatches][$longest_identical] + 1; } else { $longest_fail[$num_mismatches][$longest_identical] = $longest_fail[$num_mismatches][$longest_identical] + 1; } if ($num_mismatches == 1) { my $location = mismatch_location($alignment_mask) + 1; if ($hybridize_result eq "PASS") { $single_pass[$location] = $single_pass[$location] + 1; } else { $single_fail[$location] = $single_fail[$location] + 1; } } } } } } printTable("PASSING PROBES",@pass) if ($args{tables}); printTable("FAILED PROBES",@fail) if ($args{tables}); printLongest(\@longest_pass,\@longest_fail) if ($args{max}); printTypes(@type_grouping) if ($args{category}); printSingles(\@single_pass,\@single_fail) if ($args{single}); printIdentity($hybridize) if ($args{identity}); sub printIdentity { my ($data) = @_; for (my $i=0; $i<@{$data->{probe}}; $i++) { my $source_mismatches = int(36 - ($data->{probe}->[$i]->{source_identity} / 100 * 36)); for (my $j=0; $j<@{$data->{probe}->[$i]->{results}}; $j++) { if ((ref($hybridize->{probe}->[$i]->{results}->[$j]->{species1_align}) ne "HASH") && (ref($hybridize->{probe}->[$i]->{results}->[$j]->{result}) ne "HASH")) { my $hybrid_mismatches = countMismatches($data->{probe}->[$i]->{results}->[$j]->{alignment_mask}); print $source_mismatches. ","; print $data->{probe}->[$i]->{results}->[$j]->{species}.","; print $hybrid_mismatches."\n"; } } #print "$source_mismatches\n"; } #print countMismatches($data->{probe}->[1]->{results}->[2]->{alignment_mask}) ."\n"; } sub printTypes { my (@data) = @_; @data = sort {$a->[0] <=> $b->[0]} @data; printTitle("TYPE OF MISMATCH"); print " Number Number Number Number \n"; print "Mismatches Transitions Transversion Gaps Result\n"; print "-----------------------------------------------------\n"; for (my $i=0; $i<@data; $i++) { my $total = padd($data[$i][0],6); my $ts = padd($data[$i][1],11); my $tv = padd($data[$i][2],12); my $gap = padd($data[$i][3],10); my $result = padd($data[$i][4],9); print "$total $ts $tv $gap $result\n"; } } sub printSingles { my $line; my $header; my $results; my ($pass, $fail) = @_; printTitle("1 MISMATCH LOCATION - PASSING"); for (my $i=1; $i<=$args{length}; $i++) { $header .= padd($i,4); $results .= padd($pass->[$i],4); $line.= "----"; } print "$header\n"; print "$line\n"; print "$results\n\n"; $header =""; $line = ""; $results = ""; printTitle("1 MISMATCH LOCATION - FAILED"); for (my $i=1; $i<=$args{length}; $i++) { $header .= padd($i,4); $results .= padd($fail->[$i],4); $line.= "----"; } print "$header\n"; print "$line\n"; print "$results\n\n"; } sub printLongest { my $line; my $header; my ($pass, $fail) = @_; printArray("LONGEST SEQUENCE (1 mismatches) - PASSING",$pass->[1]); printArray("LONGEST SEQUENCE (1 mismatches) - FAILED",$fail->[1]); printArray("LONGEST SEQUENCE (2 mismatches) - PASSING",$pass->[2]); printArray("LONGEST SEQUENCE (2 mismatches) - FAILED",$fail->[2]); printArray("LONGEST SEQUENCE (3 mismatches) - PASSING",$pass->[3]); printArray("LONGEST SEQUENCE (3 mismatches) - FAILED",$fail->[3]); } sub printArray { my $title; my @data; my $line; my $header; my $info; ($title, @data) = @_; printTitle($title); for (my $i=0; $i<@{$data[0]}; $i++) { $header .= padd($i, 4); $line .= "----"; $info .= padd($data[0][$i],4); } print "$header\n"; print "$line\n"; print "$info\n\n\n"; } sub printTable { my $title; my @data; ($title, @data) = @_; my @keys = keys(%species); my $line; print "$title\n"; for (my $i=0; $i -length [-tables] [-max] [-category] [-identity] Probe Sources: -file XML Probe file -length Length of the probes in the file OUTPUT Options -tables Print pass/fail tables grouped by species and mismatch count -max Print pass/fail results group by longest identical sequence -category Print Transition, Tranversion, Gap counts plus pass/fail, grouped by mismatch count -single Print out number of Pass/Fails by probe location with only 1 mismatch -identity Print out probe identity, species and hybrid identity END_USAGE print "\n$errstr" if ($errstr); die("\n"); } return ( file => $opts{file}, tables => $opts{tables} || 0, max => $opts{max} || 0, category => $opts{category} || 0, single => $opts{single} || 0, identity => $opts{identity}|| 0, length => $opts{length}); } sub identify_mismatches { my $sequence = shift; my $ts = ($sequence =~ tr/://); my $tv = ($sequence =~ tr/ //); my $gaps = ($sequence =~ tr/-//); return ($ts, $tv, $gaps); } sub countMismatches { my $sequence = shift; return 36 - ($sequence =~ tr/|//); } sub longest_sequence { my $sequence = shift; my @snips = split /[: -]/,$sequence; my $longest = 0; for (my $i=0; $i<@snips; $i++) { $longest = length($snips[$i]) if ($longest < length($snips[$i])); } return $longest; } sub count_mismatches { my ($sequence) = shift; my $count = length($sequence) - ($sequence =~ tr/|//); return $count; }