#!/usr/bin/perl use strict; use constant BABOON => 0; use constant CHIMP => 1; use constant CAT => 2; use constant DOG => 3; use constant COW => 4; use constant PIG => 5; my @species = ("Baboon","Chimp","Cat","Dog","Cow","Pig"); getParams(); sub debug_print { my $logfile; my $msg = shift; $logfile = $0; $logfile =~ s/\.pl//g; $logfile =~ s/\.\///g; $logfile = "debug_".$logfile.".txt"; open(LOGFILE, ">>".$logfile); print LOGFILE $msg; close (LOGFILE); } sub compute_melting_temp { my $seq1; my $seq2; my $bp; my $temp = 0; my $matches = ""; my $at_cnt =0; my $gc_cnt = 0; my $mismatches = 0; ($seq1, $seq2) = (@_); for (my $i=0; $i33)) #{ #print "$rtn_value\n"; #print "$seq\n"; #} return $rtn_value; } } $i++; } return 0; } sub count_mismatch_types { my $seq1; my $seq2; my $bp1; my $bp2; my $ts=0; my $tv=0; my $gap=0; my $ignore=0; my @results; ($seq1, $seq2) = (@_); if (length($seq1) != length($seq2)) { print "DNA Sequences not all the same length\n"; return -1; } for (my $i=0; $i) { next until (/^Name/); ($probe_name) = ($_ =~ m/Probe (.+) *$/); debug_print "Found a Probe : $probe_name\n"; # Result Line @results = (0,0,0,0,0,0); debug_print "Initializing Result Flags for probe @results\n"; $_ = ; ($result_line) = ($_ =~ m/([BMKDCP_]{6})/); debug_print "Result for probe $result_line\n"; if ($result_line ne "______") { $results[BABOON] = 1 if ($result_line =~ m/B/); $results[CHIMP] = 1 if ($result_line =~ m/M/); $results[CAT] = 1 if ($result_line =~ m/K/); $results[DOG] = 1 if ($result_line =~ m/D/); $results[COW] = 1 if ($result_line =~ m/C/); $results[PIG] = 1 if ($result_line =~ m/P/); debug_print "Probe Results : @results\n"; # Human Sequence $_ = ; ($human_sequence) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Human : [$human_sequence]\n"; # Skip Mouse Sequence $_ = ; $mouse_sequence = ""; ($mouse_sequence) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Mouse : [$human_sequence]\n"; # Baboon Sequence $_ = ; ($species_sequence[BABOON]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Baboon : [$species_sequence[BABOON]]\n"; # Cat Sequence $_ = ; ($species_sequence[CAT]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Cat : [$species_sequence[CAT]]\n"; # Skip Chicken Sequence $_ = ; # Chimp Sequence $_ = ; ($species_sequence[CHIMP]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Chimp : [$species_sequence[CHIMP]]\n"; # Cow Sequence $_ = ; ($species_sequence[COW]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Cow : [$species_sequence[COW]]\n"; # Dog Sequence $_ = ; ($species_sequence[DOG]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Dog : [$species_sequence[DOG]]\n"; # Skip Fugu Sequence $_ = ; # Pig Sequence $_ = ; ($species_sequence[PIG]) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Pig : [$species_sequence[PIG]]\n\n"; # Extract the Rat Sequence $_ = ; $rat_sequence = ""; ($rat_sequence) = ($_ =~ m/\[([ATGCatgc ]{36})]/); debug_print "Rat : [$rat_sequence]\n\n"; # Do mutiple species analysis if ((length($rat_sequence) == 36) && (length($mouse_sequence) == 36)) { $probe_passes = ($result_line =~ tr/[BMKDCP]//); my $hm_mask = mask_bases $human_sequence, $mouse_sequence; my $hr_mask = mask_bases $human_sequence, $rat_sequence; my $hm_tv_cnt = ($hm_mask =~ tr/#//); my $hm_ts_cnt = ($hm_mask =~ tr/\|//); my $hm_gap_cnt = ($hm_mask =~ tr/O//); my $hr_tv_cnt = ($hr_mask =~ tr/#//); my $hr_ts_cnt = ($hr_mask =~ tr/\|//); my $hr_gap_cnt = ($hr_mask =~ tr/O//); push @mismatch_type_count, ([$probe_passes, $hm_tv_cnt, $hm_ts_cnt, $hm_gap_cnt, $hr_tv_cnt, $hr_ts_cnt, $hr_gap_cnt]); $hmr_mismatches = count_mismatches($human_sequence, $mouse_sequence, $rat_sequence); $hm_mismatches = count_mismatches($human_sequence, $mouse_sequence); $hr_mismatches = count_mismatches($human_sequence, $rat_sequence); if ($hmr_mismatches == 36) { print "Results $result_line\n"; print "Human : $human_sequence\t$hmr_mismatches\n"; print "Mouse : $mouse_sequence\t$hm_mismatches\n"; print "Rat : $rat_sequence\t$hr_mismatches\n"; exit; } push @hmr_results, ([$probe_passes,$hmr_mismatches, $hm_mismatches, $hr_mismatches]); } debug_print "Tesing Mismatches\n"; for (my $i=0; $i<@species; $i++) { debug_print "[$human_sequence]\n"; debug_print "[$species_sequence[$i]]\n"; if (length($species_sequence[$i]) == 36) { $cnt = count_mismatches($human_sequence, $species_sequence[$i]); my $melting_temp = compute_melting_temp($human_sequence, $species_sequence[$i]); $temp = mask_bases $human_sequence, $species_sequence[$i]; debug_print "[$temp]\n"; debug_print "Number of mismatches [$cnt]\n"; debug_print "Probe Passes (0/1) : [$results[$i]]\n\n\n"; if ($results[$i] == 0) { $mismatches_failed[$i][$cnt]++; $fail_sum[$cnt]++; $probe_result = "FAIL"; } else { $mismatches_passed[$i][$cnt]++; $pass_sum[$cnt]++; $probe_result = "----"; } push @{$visual_results[$cnt]},[($temp, $species[$i], $probe_result, $melting_temp)]; } else { debug_print "Species sequence not 36bp in length\n\n\n"; } } } else { for (my $i=0; $i<9; $i++) { $_ = } } } close (PROBES); # Mismatches over multiple species if (0 == 1) { @hmr_results = sort {$a->[0] <=> $b->[0]} @hmr_results; print "Order by Number of number of passes\n"; print "Species\tHMR\tHM\tHR\n"; for (my $i=0; $i<@hmr_results; $i++) { print "$hmr_results[$i][0]\t$hmr_results[$i][1]\t$hmr_results[$i][2]\t$hmr_results[$i][3]\n"; } print "\n\n\n"; @hmr_results = sort {$a->[1] <=> $b->[1]} @hmr_results; print "Order by Number of HMR Mismatches\n"; print "Species\tHMR\tHM\tHR\n"; for (my $i=0; $i<@hmr_results; $i++) { print "$hmr_results[$i][0]\t$hmr_results[$i][1]\t$hmr_results[$i][2]\t$hmr_results[$i][3]\n"; } print "\n\n\n"; @hmr_results = sort {$a->[2] <=> $b->[2]} @hmr_results; print "Order by Number of HM Mismatches\n"; print "Species\tHMR\tHM\tHR\n"; for (my $i=0; $i<@hmr_results; $i++) { print "$hmr_results[$i][0]\t$hmr_results[$i][1]\t$hmr_results[$i][2]\t$hmr_results[$i][3]\n"; } print "\n\n\n"; @hmr_results = sort {$a->[3] <=> $b->[3]} @hmr_results; print "Order by Number of HR Mismatches\n"; print "Species\tHMR\tHM\tHR\n"; for (my $i=0; $i<@hmr_results; $i++) { print "$hmr_results[$i][0]\t$hmr_results[$i][1]\t$hmr_results[$i][2]\t$hmr_results[$i][3]\n"; } print "\n\n\n"; } # 2 Tables, one pass grid, one fail grid. compaing number of mismatches vs. species if (0 == 1) { print "\n\nPASSING PROBES\n\n\t"; for (my $i=0; $i<37; $i++) { print $i; if ($i < 10) { print " "; } else { print " "; } } print "\n"; print "\t"; for (my $i=0; $i<4*37; $i++) { print "-"; } print "\n"; for (my $species_idx=0; $species_idx<=$#species; $species_idx++) { print "$species[$species_idx]\t"; for (my $mismatch_idx=0; $mismatch_idx<37; $mismatch_idx++) { print "$mismatches_passed[$species_idx][$mismatch_idx]"; for (my $i=0; $i<4-(length($mismatches_passed[$species_idx][$mismatch_idx])); $i++) { print " "; } } print "\n"; } print "\t"; for (my $i=0; $i<4*37; $i++) { print "-"; } print "\n"; print "Total\t"; for (my $i=0; $i<37; $i++) { print $pass_sum[$i]; if ($pass_sum[$i] < 10) { print " "; } elsif ($pass_sum[$i] < 100) { print " "; } else { print " "; } } print "\n\n\n"; print "\n\nFAILED PROBES\n\n\t"; for (my $i=0; $i<37; $i++) { print $i; if ($i < 10) { print " "; } else { print " "; } } print "\n"; print "\t"; for (my $i=0; $i<4*37; $i++) { print "-"; } print "\n"; for (my $species_idx=0; $species_idx<=$#species; $species_idx++) { print "$species[$species_idx]\t"; for (my $mismatch_idx=0; $mismatch_idx<37; $mismatch_idx++) { print "$mismatches_failed[$species_idx][$mismatch_idx]"; for (my $i=0; $i<4-(length($mismatches_failed[$species_idx][$mismatch_idx])); $i++) { print " "; } } print "\n"; } print "\t"; for (my $i=0; $i<4*37; $i++) { print "-"; } print "\n"; print "Total\t"; for (my $i=0; $i<37; $i++) { print $fail_sum[$i]; if ($fail_sum[$i] < 10) { print " "; } elsif ($fail_sum[$i] < 100) { print " "; } else { print " "; } } print "\n\n\n"; } if (0 == 1) { my $verify_counter=0; my $verify_fail = 0; my $verify_pass = 0; # Length Analysis for (my $i=0; $i<@{$visual_results[2]}; $i++) { $verify_counter++; my $bp_idx1 = locate_mismatches $visual_results[2][$i][0], 1; my $bp_idx2 = locate_mismatches $visual_results[2][$i][0], 2; my @longest; push @longest, 36-$bp_idx2; push @longest, $bp_idx2-$bp_idx1-1; push @longest, $bp_idx1-1; @longest = sort {$a <=> $b} @longest; my $longest_seq = $longest[2]; if ($visual_results[2][$i][2] eq "FAIL") { $verify_fail++; $fail_idx[$longest_seq]++; } else { $verify_pass++; $pass_idx[$longest_seq]++; } } print "Total 2 Mismatches : $verify_counter\n"; print "Total pass : $verify_pass\n"; print "Total fail : $verify_fail\n"; my $tmp_ctr = 0; print "Passing Probes, Longest Sequence\n"; for (my $i=1; $i<37; $i++) { print "$i\t$pass_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $pass_idx[$i]; } print "Total Pass : $tmp_ctr\n"; print "\n"; $tmp_ctr =0; print "Failing Probes, Shortest Seq\n"; for (my $i=1; $i<37; $i++) { print "$i\t$fail_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $fail_idx[$i]; } print "Total Fail: $tmp_ctr\n"; print "\n"; } if (0 == 1) { my $verify_counter=0; my $verify_fail = 0; my $verify_pass = 0; # Length Analysis for (my $i=0; $i<@{$visual_results[2]}; $i++) { $verify_counter++; my $bp_idx1 = locate_mismatches $visual_results[2][$i][0], 1; my $bp_idx2 = locate_mismatches $visual_results[2][$i][0], 2; #my $bp_idx3 = locate_mismatches $visual_results[3][$i][0], 3; my @shortest; # Don't use the end sequences #push @shortest, 36-$bp_idx3; #push @shortest, $bp_idx3 -$bp_idx2 - 1; push @shortest, $bp_idx2-$bp_idx1-1; #push @shortest, $bp_idx1-1; @shortest = sort {$a <=> $b} @shortest; my $shortest_seq = $shortest[0]; if ($visual_results[2][$i][2] eq "FAIL") { $verify_fail++; $fail_idx[$shortest_seq]++; } else { $verify_pass++; $pass_idx[$shortest_seq]++; } } print "Total 2 Mismatches : $verify_counter\n"; print "Total pass : $verify_pass\n"; print "Total fail : $verify_fail\n"; my $tmp_ctr = 0; print "Passing Probes, Shortet Seq\n"; for (my $i=0; $i<37; $i++) { print "$i\t$pass_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $pass_idx[$i]; } print "Total Pass : $tmp_ctr\n"; print "\n"; $tmp_ctr =0; print "Failing Probes, Shortest Seq\n"; for (my $i=0; $i<37; $i++) { print "$i\t$fail_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $fail_idx[$i]; } print "Total Fail: $tmp_ctr\n"; print "\n"; } if (0 == 1) { # Index Analysis for (my $i=0; $i<@{$visual_results[1]}; $i++) { my $bp_idx1 = locate_mismatches $visual_results[1][$i][0], 1; if ($visual_results[1][$i][2] eq "FAIL") { $fail_idx[$bp_idx1]++; } else { $pass_idx[$bp_idx1]++; } } my $tmp_ctr = 0; print "Passing Probes, Mismatch Index\n"; for (my $i=1; $i<37; $i++) { print "$i\t$pass_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $pass_idx[$i]; } print "Total Pass : $tmp_ctr\n"; print "\n"; $tmp_ctr =0; print "Failing Probes, Mismatch Index\n"; for (my $i=1; $i<37; $i++) { print "$i\t$fail_idx[$i]\n"; $tmp_ctr = $tmp_ctr + $fail_idx[$i]; } print "Total Fail: $tmp_ctr\n"; print "\n"; } if ( 0 == 1) { for (my $i=0; $i<@visual_results; $i++) { my $last_id = ""; if (@{$visual_results[$i]} > 0) { @{$visual_results[$i]} = sort {$a->[3] <=> $b->[3] or $a->[1] cmp $b->[1]} @{$visual_results[$i]}; for (my $j=0; $j<@{$visual_results[$i]}; $j++) { if ($last_id ne $visual_results[$i][$j][1]) { print "\n"; $last_id = $visual_results[$i][$j][1]; } print "$i\t$visual_results[$i][$j][2]\t$visual_results[$i][$j][3]\n"; } } print "\n"; } } if (1 == 1) { print "Order by Number of HMR Mismatches\n"; print "Species\tHM-tv\tHM-ts\tHM-gap\tHR-tv\tHR-ts\tHR-gap\n"; for (my $i=0; $i<@mismatch_type_count; $i++) { print "$mismatch_type_count[$i][0]"; print "\t$mismatch_type_count[$i][1]"; print "\t$mismatch_type_count[$i][2]"; print "\t$mismatch_type_count[$i][3]"; print "\t$mismatch_type_count[$i][4]"; print "\t$mismatch_type_count[$i][5]"; print "\t$mismatch_type_count[$i][6]\n"; } }