#!/usr/bin/perl use strict; use constant SUCCESS => 0; use constant ERROR => -1; require XML::Simple; use subs ; use Data::Dumper; use File::Basename; use Benchmark; my %parameters = getParams(); my $location; my $file; my $ext; if ($parameters{file} ne "") { my @rankings; my @final_ranking; ($file,$location) = fileparse($parameters{file}); chop($location = `pwd`) if ($location eq "./"); # @rankings = Rank_Probes($location,"hybridize_results.xml.zfish",@rankings); # @rankings = Rank_Probes($location,"hybridize_results.xml.tetraodon",@rankings); @rankings = Rank_Probes($location,"hybridize_results.xml.fugu",@rankings); @rankings = Rank_Probes($location,"hybridize_results.xml.chicken",@rankings); @rankings = Rank_Probes($location,"hybridize_results.xml.mouse",@rankings); @rankings = sort {$b->[2] <=> $a->[2]} @rankings; my @final_probes; my $last = ""; for my $probe (@rankings) { if ($probe->[0] ne $last) { push @final_ranking, [$probe->[0],$probe->[1],$probe->[2],$probe->[3],$probe->[4],$probe->[5],$probe->[6],$probe->[7]]; $last = $probe->[0]; } } print "\n"; print "\n"; for my $probe (@final_ranking) { print " \n"; print " $probe->[2]\n"; print " $probe->[3]\n"; print " $probe->[4]\n"; print " $probe->[5]\n"; print " $probe->[6]\n"; print " $probe->[7]\n"; print " $probe->[1]\n"; print " $probe->[0]\n"; print " \n"; } print "\n"; } Process_Target("Target$parameters{target}") if ($parameters{target} > 0); Process_All() if ($parameters{all} == 1); ######################################################################### # # Process_All_Files # ######################################################################### sub Process_All { debug_print("Start Processing all targets\n"); opendir(TARGET,"/home/bcarlson/data"); foreach my $target( grep /^Target\d$/, readdir(TARGET)) { debug_print("\tCall Target [$target]\n"); Process_Target($target); } debug_print("Stop Processing all targets\n"); } ######################################################################### # # Process_Target # ######################################################################### sub Process_Target { my $target_dir = shift; my @rankings; $target_dir = "/home/bcarlson/data/$target_dir"; debug_print("\tProcessing Target [$target_dir]\n"); opendir(TARGET,$target_dir); foreach my $file ( grep /^human_.+verbose.probes.xml$/, readdir(TARGET)) { debug_print("\t\tFile [$file]\n"); @rankings = Rank_Probes($target_dir,$file,@rankings); } for (my $i=0; $i<$#rankings; $i++) { print "$rankings[$i][2]\t$rankings[$i][0]\n"; } } ######################################################################### # # Process_Probe_File # ######################################################################### sub Rank_Probes { my @probe_list; my $infile = new XML::Simple(); my $cat1_species_count; my $cat2_species_count; my $cat3_species_count; my $total_species; my $cat1_sum_success_rate; my $cat2_sum_success_rate; my $cat3_sum_success_rate; my $cat1_probe_success_rate; my $cat2_probe_success_rate; my $cat3_probe_success_rate; my ($location, $probe_file, @rankings) = (@_); # print "Processing: $probe_file ($location)\n\n"; debug_print("\t\t\tProcessing [$probe_file] at location [$location]\n"); my $probes = $infile->XMLin("$location/$probe_file"); for (my $i=0; $i<@{$probes->{hybridization}}; $i++) { $cat1_species_count=0; $cat2_species_count=0; $cat3_species_count=0; $total_species=0; $cat1_sum_success_rate=0; $cat2_sum_success_rate=0; $cat3_sum_success_rate=0; $cat1_probe_success_rate=0; $cat2_probe_success_rate=0; $cat3_probe_success_rate=0; debug_print("\t\t\t\t[".$probes->{hybridization}->[$i]->{probe}->{sequence}."]\n"); #print "\t[".$probes->{hybridization}->[$i]->{probe}->{sequence}."]\n"; #print "\t[".$probes->{hybridization}->[$i]->{probe}->{species}."]\n"; for (my $j=0; $j<@{$probes->{hybridization}->[$i]->{result}}; $j++) { # print "\t\t[".$probes->{hybridization}->[$i]->{result}->[$j]->{species}."]\n"; # print "\t\t[".$probes->{hybridization}->[$i]->{result}->[$j]->{success_rate}."]\n"; $total_species++; if ($probes->{hybridization}->[$i]->{result}->[$j]->{success_rate} > 0) { if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "dog" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "cat" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "cow" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "pig" )) { $cat1_species_count++; $cat1_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate}; } if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "mouse" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "rat" )) { $cat2_species_count++; $cat2_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate}; } if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "zfish" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "tetraodon" ) || ($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "chicken" )) { $cat3_species_count++; $cat3_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate}; } } # print "Total Species [$total_species]\n"; # print "Species Count [$species_count]\n"; #print "Success Rate [$sum_success_rate]\n\n"; } #$probe_success_rate = sprintf("%.1f",$sum_success_rate / $species_count); $cat1_probe_success_rate = 0; $cat2_probe_success_rate = 0; $cat3_probe_success_rate = 0; if ($cat1_species_count > 0) { $cat1_probe_success_rate = sprintf("%.1f",$cat1_sum_success_rate / $cat1_species_count); } if ($cat2_species_count > 0) { $cat2_probe_success_rate = sprintf("%.1f",$cat2_sum_success_rate / $cat2_species_count); } if ($cat3_species_count > 0) { $cat3_probe_success_rate = sprintf("%.1f",$cat3_sum_success_rate / $cat3_species_count); } #print "OVER ALL PROBE RATE [$probe_success_rate] on $species_count species\n\n"; #print "push $probes->{hybridization}->[$i]->{probe}->{sequence}\n"; push @rankings, [$probes->{hybridization}->[$i]->{probe}->{sequence}, $probes->{hybridization}->[$i]->{probe}->{species}, $cat1_probe_success_rate, $cat1_species_count, $cat2_probe_success_rate, $cat2_species_count, $cat3_probe_success_rate, $cat3_species_count]; } # End Loop for probe #@rankings = sort {$b->[2] <=> $a->[2]} @rankings; return @rankings; } #use Data::Dumper; #print Dumper(\%parameters); sub Calculate_Score { my $target_file; my @species = ("mouse","baboon","cat","chicken","chimp","cow","dog","fugu","pig","rat","tetra","zfish"); my $species_dna = ""; my ($probe, $target, $source_species) = (@_); for (my $i=0; $i<=$#species ; $i++) { } # End Loop for Species } sub debug_print { use vars qw(%parameters); if ($parameters{debug} ne "") { my $msg = shift; open(LOGFILE, ">>".$parameters{debug}); print LOGFILE $msg; close (LOGFILE); } } ######################################################################### # getParams - gets all options from the command line or keeps defaults # dies if required options are not included # # uses Getopt::Std and reads @ARGV # defaults can be modified here # returns hash of options and values # # THIS LIST IS OUT OF DATE... JUST LOOK BELOW # pick_from_query - 0 to make overgo from reference sequence # 1 to make from query # identity_cutoff - minimum proportion identical bases for pip # ref_offset - offset from begining of reference sequence at which to # begin looking for pips # gap_length - optimal spacing between overgos # overgo_length - the length of the overgo probe to design # overgo_overlap - the overlap between primers for the probe # overgo_target_gc- the target GC percentage to go for # overgo_gc_wiggle- the maximum difference from the overgo_target_gc # for an overgo # debug - write debugging messages to stdout and logfile # overgo_filename - file to write picked overgos to # all_overgo_filename - file to write all possible overgos to (defaults # to none) ######################################################################### sub getParams { my %opts; my $errstr = ""; use Getopt::Long; use Data::Dumper; GetOptions(\%opts, 'debug:s','all','target=i','file=s'); #debug_print Dumper(\%opts); if (defined($opts{debug})) { $opts{debug} = "rank_probes.log" if ($opts{debug} eq ""); } if ((!defined($opts{target})) && (!defined($opts{all})) && (!defined($opts{file}))) { $errstr .= "ERROR : No probes sources defined.\n"; } if ((defined($opts{target})) && ($opts{target} =~ m/[^1-5]/)) { $errstr .= "Target number invalid, only 1-5\n"; } if ($errstr) { print <<"END_USAGE"; Description: This script will count the number of probes for each species category. Usage: rank_probes [options] Probe Sources: -file Rank probes from a single probe file. -target Rank probes from one target (1-5). -all Rank probes from all targets. -debug [logfile] Print debug messages to log file (Default: rank_probes.log) END_USAGE print "\n$errstr" if ($errstr); die("\n"); } return ( all => $opts{all} || 0, file => $opts{file} || "", target => $opts{target} || 0, debug => $opts{debug} || ""); }