#!/usr/bin/perl use strict; use constant SUCCESS => 0; use constant ERROR => -1; require XML::Simple; use subs ; use Data::Dumper; use File::Basename; use Benchmark; # This script will take a XML file of probes, and look up the location in the human # the location of the probes. my $probe_file = getParams(); my $in1= new XML::Simple(); my $probes = $in1->XMLin("$probe_file"); #print Dumper \$probes; #exit; #open (CSV,">/home/bcarlson/data/Target3/all_probes.cvs"); #open (XML,">/home/bcarlson/data/Target3/all_probes.xml"); open (CSV,">all_probes.cvs"); open (XML,">all_probes.xml"); open (BLAST,">all_probes.blast"); print XML "\n"; print XML "\n"; for (my $i=0; $i<@{$probes->{probe}}; $i++) { my $species = $probes->{probe}->[$i]->{species}; my $target_probe = $probes->{probe}->[$i]->{sequence}; my $hybridize_file = "human_".$species."_T5.verbose.probes.xml"; my $in2 = new XML::Simple(); my $results = $in2->XMLin("$hybridize_file"); my $j=0; my $max = @{$results->{parent_alignment}}; my $location = 0; while (($location == 0) && ($j<$max)) { #print "Line $i : $target_probe $results->{parent_alignment}->[$i]->{probe}->{sequence}\n"; if ($target_probe eq $results->{parent_alignment}->[$j]->{probe}->{sequence}) { $location = $results->{parent_alignment}->[$j]->{species}->{species1}->{start}; $location = $location + $results->{parent_alignment}->[$j]->{probe}->{offset}; } $j++; } my $cat1_count = $probes->{probe}->[$i]->{cat1_count}; my $cat1_score = $probes->{probe}->[$i]->{cat1_score}; my $cat2_count = $probes->{probe}->[$i]->{cat2_count}; my $cat2_score = $probes->{probe}->[$i]->{cat2_score}; my $cat3_count = $probes->{probe}->[$i]->{cat3_count}; my $cat3_score = $probes->{probe}->[$i]->{cat3_score}; print BLAST ">$i $species $location $cat1_count $cat1_score $cat2_count $cat2_score $cat3_count $cat3_score\n"; print BLAST "$target_probe\n\n"; print CSV "$i,$cat1_score,$cat1_count,$cat2_score,$cat2_count,$cat3_score,$cat3_count,$location,$species,$target_probe\n"; print XML " \n"; print XML " $i\n"; print XML " $target_probe\n"; print XML " $species\n"; print XML " $location\n"; print XML " $cat1_count\n"; print XML " $cat1_score\n"; print XML " $cat2_count\n"; print XML " $cat2_score\n"; print XML " $cat3_count\n"; print XML " $cat3_score\n"; print XML " \n\n"; } print XML "\n"; ######################################################################### # getParams - gets all options from the command line or keeps defaults # dies if required options are not included # # uses Getopt::Std and reads @ARGV # defaults can be modified here # returns hash of options and values # # THIS LIST IS OUT OF DATE... JUST LOOK BELOW # pick_from_query - 0 to make overgo from reference sequence # 1 to make from query # identity_cutoff - minimum proportion identical bases for pip # ref_offset - offset from begining of reference sequence at which to # begin looking for pips # gap_length - optimal spacing between overgos # overgo_length - the length of the overgo probe to design # overgo_overlap - the overlap between primers for the probe # overgo_target_gc- the target GC percentage to go for # overgo_gc_wiggle- the maximum difference from the overgo_target_gc # for an overgo # debug - write debugging messages to stdout and logfile # overgo_filename - file to write picked overgos to # all_overgo_filename - file to write all possible overgos to (defaults # to none) ######################################################################### sub getParams { my %opts; my $errstr = ""; use Getopt::Long; use Data::Dumper; GetOptions(\%opts, 'file=s'); if (!defined($opts{file})) { $errstr .= "ERROR : No probes sources defined.\n"; } if ($errstr) { print <<"END_USAGE"; Description: This script will take a XML file of probes, and look up the location in the human the location of the probes. Usage: locate_bp [options] Probe Sources: -file Hybridization file resultant file. END_USAGE print "\n$errstr" if ($errstr); die("\n"); } return $opts{file}; }