#!/usr/bin/perl
use strict;
use constant SUCCESS => 0;
use constant ERROR => -1;
require XML::Simple;
use subs ;
use Data::Dumper;
use File::Basename;
use Benchmark;
# This script will take a XML file of probes, and look up the location in the human
# the location of the probes.
my $probe_file = getParams();
my $in1= new XML::Simple();
my $probes = $in1->XMLin("$probe_file");
#print Dumper \$probes;
#exit;
#open (CSV,">/home/bcarlson/data/Target3/all_probes.cvs");
#open (XML,">/home/bcarlson/data/Target3/all_probes.xml");
open (CSV,">all_probes.cvs");
open (XML,">all_probes.xml");
open (BLAST,">all_probes.blast");
print XML "\n";
print XML "\n";
for (my $i=0; $i<@{$probes->{probe}}; $i++)
{
my $species = $probes->{probe}->[$i]->{species};
my $target_probe = $probes->{probe}->[$i]->{sequence};
my $hybridize_file = "human_".$species."_T5.verbose.probes.xml";
my $in2 = new XML::Simple();
my $results = $in2->XMLin("$hybridize_file");
my $j=0;
my $max = @{$results->{parent_alignment}};
my $location = 0;
while (($location == 0) && ($j<$max))
{
#print "Line $i : $target_probe $results->{parent_alignment}->[$i]->{probe}->{sequence}\n";
if ($target_probe eq $results->{parent_alignment}->[$j]->{probe}->{sequence})
{
$location = $results->{parent_alignment}->[$j]->{species}->{species1}->{start};
$location = $location + $results->{parent_alignment}->[$j]->{probe}->{offset};
}
$j++;
}
my $cat1_count = $probes->{probe}->[$i]->{cat1_count};
my $cat1_score = $probes->{probe}->[$i]->{cat1_score};
my $cat2_count = $probes->{probe}->[$i]->{cat2_count};
my $cat2_score = $probes->{probe}->[$i]->{cat2_score};
my $cat3_count = $probes->{probe}->[$i]->{cat3_count};
my $cat3_score = $probes->{probe}->[$i]->{cat3_score};
print BLAST ">$i $species $location $cat1_count $cat1_score $cat2_count $cat2_score $cat3_count $cat3_score\n";
print BLAST "$target_probe\n\n";
print CSV "$i,$cat1_score,$cat1_count,$cat2_score,$cat2_count,$cat3_score,$cat3_count,$location,$species,$target_probe\n";
print XML " \n";
print XML " $i\n";
print XML " $target_probe\n";
print XML " $species\n";
print XML " $location\n";
print XML " $cat1_count\n";
print XML " $cat1_score\n";
print XML " $cat2_count\n";
print XML " $cat2_score\n";
print XML " $cat3_count\n";
print XML " $cat3_score\n";
print XML " \n\n";
}
print XML "\n";
#########################################################################
# getParams - gets all options from the command line or keeps defaults
# dies if required options are not included
#
# uses Getopt::Std and reads @ARGV
# defaults can be modified here
# returns hash of options and values
#
# THIS LIST IS OUT OF DATE... JUST LOOK BELOW
# pick_from_query - 0 to make overgo from reference sequence
# 1 to make from query
# identity_cutoff - minimum proportion identical bases for pip
# ref_offset - offset from begining of reference sequence at which to
# begin looking for pips
# gap_length - optimal spacing between overgos
# overgo_length - the length of the overgo probe to design
# overgo_overlap - the overlap between primers for the probe
# overgo_target_gc- the target GC percentage to go for
# overgo_gc_wiggle- the maximum difference from the overgo_target_gc
# for an overgo
# debug - write debugging messages to stdout and logfile
# overgo_filename - file to write picked overgos to
# all_overgo_filename - file to write all possible overgos to (defaults
# to none)
#########################################################################
sub getParams {
my %opts;
my $errstr = "";
use Getopt::Long;
use Data::Dumper;
GetOptions(\%opts, 'file=s');
if (!defined($opts{file}))
{
$errstr .= "ERROR : No probes sources defined.\n";
}
if ($errstr) {
print <<"END_USAGE";
Description:
This script will take a XML file of probes, and look up the location in the human
the location of the probes.
Usage: locate_bp [options]
Probe Sources:
-file Hybridization file resultant file.
END_USAGE
print "\n$errstr" if ($errstr);
die("\n");
}
return $opts{file};
}