#!/usr/bin/perl
use strict;
use constant SUCCESS => 0;
use constant ERROR => -1;
require XML::Simple;
use subs ;
use Data::Dumper;
use File::Basename;
use Benchmark;
my %parameters = getParams();
my $location;
my $file;
my $ext;
if ($parameters{file} ne "")
{
my @rankings;
my @final_ranking;
($file,$location) = fileparse($parameters{file});
chop($location = `pwd`) if ($location eq "./");
# @rankings = Rank_Probes($location,"hybridize_results.xml.zfish",@rankings);
# @rankings = Rank_Probes($location,"hybridize_results.xml.tetraodon",@rankings);
@rankings = Rank_Probes($location,"hybridize_results.xml.fugu",@rankings);
@rankings = Rank_Probes($location,"hybridize_results.xml.chicken",@rankings);
@rankings = Rank_Probes($location,"hybridize_results.xml.mouse",@rankings);
@rankings = sort {$b->[2] <=> $a->[2]} @rankings;
my @final_probes;
my $last = "";
for my $probe (@rankings)
{
if ($probe->[0] ne $last)
{
push @final_ranking, [$probe->[0],$probe->[1],$probe->[2],$probe->[3],$probe->[4],$probe->[5],$probe->[6],$probe->[7]];
$last = $probe->[0];
}
}
print "\n";
print "\n";
for my $probe (@final_ranking)
{
print " \n";
print " $probe->[2]\n";
print " $probe->[3]\n";
print " $probe->[4]\n";
print " $probe->[5]\n";
print " $probe->[6]\n";
print " $probe->[7]\n";
print " $probe->[1]\n";
print " $probe->[0]\n";
print " \n";
}
print "\n";
}
Process_Target("Target$parameters{target}") if ($parameters{target} > 0);
Process_All() if ($parameters{all} == 1);
#########################################################################
#
# Process_All_Files
#
#########################################################################
sub Process_All
{
debug_print("Start Processing all targets\n");
opendir(TARGET,"/home/bcarlson/data");
foreach my $target( grep /^Target\d$/, readdir(TARGET))
{
debug_print("\tCall Target [$target]\n");
Process_Target($target);
}
debug_print("Stop Processing all targets\n");
}
#########################################################################
#
# Process_Target
#
#########################################################################
sub Process_Target
{
my $target_dir = shift;
my @rankings;
$target_dir = "/home/bcarlson/data/$target_dir";
debug_print("\tProcessing Target [$target_dir]\n");
opendir(TARGET,$target_dir);
foreach my $file ( grep /^human_.+verbose.probes.xml$/, readdir(TARGET))
{
debug_print("\t\tFile [$file]\n");
@rankings = Rank_Probes($target_dir,$file,@rankings);
}
for (my $i=0; $i<$#rankings; $i++)
{
print "$rankings[$i][2]\t$rankings[$i][0]\n";
}
}
#########################################################################
#
# Process_Probe_File
#
#########################################################################
sub Rank_Probes
{
my @probe_list;
my $infile = new XML::Simple();
my $cat1_species_count;
my $cat2_species_count;
my $cat3_species_count;
my $total_species;
my $cat1_sum_success_rate;
my $cat2_sum_success_rate;
my $cat3_sum_success_rate;
my $cat1_probe_success_rate;
my $cat2_probe_success_rate;
my $cat3_probe_success_rate;
my ($location, $probe_file, @rankings) = (@_);
# print "Processing: $probe_file ($location)\n\n";
debug_print("\t\t\tProcessing [$probe_file] at location [$location]\n");
my $probes = $infile->XMLin("$location/$probe_file");
for (my $i=0; $i<@{$probes->{hybridization}}; $i++)
{
$cat1_species_count=0;
$cat2_species_count=0;
$cat3_species_count=0;
$total_species=0;
$cat1_sum_success_rate=0;
$cat2_sum_success_rate=0;
$cat3_sum_success_rate=0;
$cat1_probe_success_rate=0;
$cat2_probe_success_rate=0;
$cat3_probe_success_rate=0;
debug_print("\t\t\t\t[".$probes->{hybridization}->[$i]->{probe}->{sequence}."]\n");
#print "\t[".$probes->{hybridization}->[$i]->{probe}->{sequence}."]\n";
#print "\t[".$probes->{hybridization}->[$i]->{probe}->{species}."]\n";
for (my $j=0; $j<@{$probes->{hybridization}->[$i]->{result}}; $j++)
{
# print "\t\t[".$probes->{hybridization}->[$i]->{result}->[$j]->{species}."]\n";
# print "\t\t[".$probes->{hybridization}->[$i]->{result}->[$j]->{success_rate}."]\n";
$total_species++;
if ($probes->{hybridization}->[$i]->{result}->[$j]->{success_rate} > 0)
{
if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "dog" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "cat" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "cow" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "pig" ))
{
$cat1_species_count++;
$cat1_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate};
}
if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "mouse" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "rat" ))
{
$cat2_species_count++;
$cat2_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate};
}
if (($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "zfish" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "tetraodon" ) ||
($probes->{hybridization}->[$i]->{result}->[$j]->{species} eq "chicken" ))
{
$cat3_species_count++;
$cat3_sum_success_rate += $probes->{hybridization}->[$i]->{result}->[$j]->{success_rate};
}
}
# print "Total Species [$total_species]\n";
# print "Species Count [$species_count]\n";
#print "Success Rate [$sum_success_rate]\n\n";
}
#$probe_success_rate = sprintf("%.1f",$sum_success_rate / $species_count);
$cat1_probe_success_rate = 0;
$cat2_probe_success_rate = 0;
$cat3_probe_success_rate = 0;
if ($cat1_species_count > 0)
{
$cat1_probe_success_rate = sprintf("%.1f",$cat1_sum_success_rate / $cat1_species_count);
}
if ($cat2_species_count > 0)
{
$cat2_probe_success_rate = sprintf("%.1f",$cat2_sum_success_rate / $cat2_species_count);
}
if ($cat3_species_count > 0)
{
$cat3_probe_success_rate = sprintf("%.1f",$cat3_sum_success_rate / $cat3_species_count);
}
#print "OVER ALL PROBE RATE [$probe_success_rate] on $species_count species\n\n";
#print "push $probes->{hybridization}->[$i]->{probe}->{sequence}\n";
push @rankings, [$probes->{hybridization}->[$i]->{probe}->{sequence},
$probes->{hybridization}->[$i]->{probe}->{species},
$cat1_probe_success_rate,
$cat1_species_count,
$cat2_probe_success_rate,
$cat2_species_count,
$cat3_probe_success_rate,
$cat3_species_count];
} # End Loop for probe
#@rankings = sort {$b->[2] <=> $a->[2]} @rankings;
return @rankings;
}
#use Data::Dumper;
#print Dumper(\%parameters);
sub Calculate_Score
{
my $target_file;
my @species = ("mouse","baboon","cat","chicken","chimp","cow","dog","fugu","pig","rat","tetra","zfish");
my $species_dna = "";
my ($probe, $target, $source_species) = (@_);
for (my $i=0; $i<=$#species ; $i++)
{
} # End Loop for Species
}
sub debug_print
{
use vars qw(%parameters);
if ($parameters{debug} ne "")
{
my $msg = shift;
open(LOGFILE, ">>".$parameters{debug});
print LOGFILE $msg;
close (LOGFILE);
}
}
#########################################################################
# getParams - gets all options from the command line or keeps defaults
# dies if required options are not included
#
# uses Getopt::Std and reads @ARGV
# defaults can be modified here
# returns hash of options and values
#
# THIS LIST IS OUT OF DATE... JUST LOOK BELOW
# pick_from_query - 0 to make overgo from reference sequence
# 1 to make from query
# identity_cutoff - minimum proportion identical bases for pip
# ref_offset - offset from begining of reference sequence at which to
# begin looking for pips
# gap_length - optimal spacing between overgos
# overgo_length - the length of the overgo probe to design
# overgo_overlap - the overlap between primers for the probe
# overgo_target_gc- the target GC percentage to go for
# overgo_gc_wiggle- the maximum difference from the overgo_target_gc
# for an overgo
# debug - write debugging messages to stdout and logfile
# overgo_filename - file to write picked overgos to
# all_overgo_filename - file to write all possible overgos to (defaults
# to none)
#########################################################################
sub getParams {
my %opts;
my $errstr = "";
use Getopt::Long;
use Data::Dumper;
GetOptions(\%opts, 'debug:s','all','target=i','file=s');
#debug_print Dumper(\%opts);
if (defined($opts{debug}))
{
$opts{debug} = "rank_probes.log" if ($opts{debug} eq "");
}
if ((!defined($opts{target})) && (!defined($opts{all})) && (!defined($opts{file})))
{
$errstr .= "ERROR : No probes sources defined.\n";
}
if ((defined($opts{target})) && ($opts{target} =~ m/[^1-5]/))
{
$errstr .= "Target number invalid, only 1-5\n";
}
if ($errstr) {
print <<"END_USAGE";
Description:
This script will count the number of probes for each species category.
Usage: rank_probes [options]
Probe Sources:
-file Rank probes from a single probe file.
-target Rank probes from one target (1-5).
-all Rank probes from all targets.
-debug [logfile] Print debug messages to log file (Default: rank_probes.log)
END_USAGE
print "\n$errstr" if ($errstr);
die("\n");
}
return (
all => $opts{all} || 0,
file => $opts{file} || "",
target => $opts{target} || 0,
debug => $opts{debug} || "");
}