#!/usr/bin/perl use strict; use Data::Dumper; use constant SUCCESS => 0; use constant ERROR => -1; use XML::Simple; my $results; my $sequence; my $target; # Hard coding input file my %args = getParams(); my $probe_file = new XML::Simple(); my $probes = $probe_file->XMLin($args{hybrid_file}, keyattr => {}); open(INFILE, $args{file}); open(OUTFILE, ">".$args{outfile}); while () { if ($_ =~ /Comment/) { last; } } for (my $i=0; $i<2; $i++) { $_ = ; } printXMLheader (*OUTFILE); PROBE_NAME: while () { if ($_ =~ /; } ($results) = ($_ =~ m/(.+)<\/FONT>/); $results = uc($results); for (my $i=0; $i<11; $i++) { $_ = ; } ($sequence) = ($_ =~ m/([ATGC]{0,40})<\/FONT>/); addResults( *OUTFILE, $probes, $target, $results, $sequence); for (my $i=0; $i < 4; $i++) { $_ = ; } } } close (INFILE); printXMLfooter (*OUTFILE); sub addResults { local *OUTFILE = shift; my $probes = shift; my $target = shift; my $all_results = shift; my $sequence = shift; if (($target == $args{target}) && ($args{probesize} == length($sequence))) { for (my $i=0; $i<@{$probes->{probe}}; $i++) { if ($sequence eq $probes->{probe}->[$i]->{sequence}) { printProbeHeader ( *OUTFILE, $probes->{probe}->[$i] ); for (my $j=0; $j<@{$probes->{probe}->[$i]->{results}}; $j++) { my $species_name = $probes->{probe}->[$i]->{results}->[$j]->{species}; my $species_sequence = $probes->{probe}->[$i]->{results}->[$j]->{species2_align}; my $species_result = determineResult($all_results, $species_name, $species_sequence); printProbeResults ( *OUTFILE, $probes->{probe}->[$i]->{results}->[$j], $species_result); } printProbeFooter ( *OUTFILE, $probes->{probe}->[$i] ); last; } } } return; } sub determineResult { my $all_results = shift; my $species = shift; my $sequence = shift; my $result = ""; # Species Lookup to get Character code. my %species_lookup = ( "HUMAN" => "H", "BABOON" => "B", "CHIMP" => "M", "CAT" => "K", "DOG" => "D", "COW" => "C", "PIG" => "P", "MOUSE" => "F" ); # If the species doesn't have a sequence, then the XML read will set the sequence to # be a hash. If no sequence, then the result will be "". $species = uc($species); if (ref($sequence) ne "HASH") { $result = "FAIL"; my $species_code = $species_lookup{$species}; $result = "PASS" if ($all_results =~ m/$species_code/); $result = "" if ($species_code eq ""); } return $result; } sub getParams { my %opts; my $errstr = ""; use Getopt::Long; use Data::Dumper; GetOptions(\%opts, 'file=s', 'target=i', 'probesize=i', 'inputfile=s', 'output=s', 'help'); if (!defined($opts{help})) { if (!defined($opts{file})) { $errstr .= "ERROR : No probes sources defined.\n"; } if (!defined($opts{target})) { $errstr .= "ERROR : No target specified\n"; } } else { $errstr = " "; } if ($errstr) { print <<"END_USAGE"; Description: This script will take a html formated file containing probe results and add the results to the hybridization file. Usage: add_results_to_hybridization.pl -file -target -inputfile -probesize [-output ] -file Name of file that contains lab results -target Number of target to process -inputfile Name of file that contains XML computational hyrbidizaiton results -probesize Size of probes in hybridizefile -output Output XML, that contains the lab results (default : add_results.out) END_USAGE print "\n$errstr" if ($errstr); die("\n"); } return ( file => $opts{file}, target => $opts{target}, hybrid_file => $opts{inputfile}, probesize => $opts{probesize} || 36, outfile => $opts{output} || "add_results.out" ); } sub printXMLheader { local *OUTFILE = shift; print OUTFILE "\n"; print OUTFILE "\n"; return; } sub printXMLfooter { local *OUTFILE = shift; print OUTFILE "\n\n"; print OUTFILE "\n"; return; } sub printProbeHeader { local *OUTFILE = shift; my $probe = shift; print OUTFILE "\n\n"; print OUTFILE " \n"; print OUTFILE " ".$probe->{source}."\n"; print OUTFILE " ".$probe->{sequence}."\n"; print OUTFILE " ".$probe->{start}."\n"; print OUTFILE " ".$probe->{stop}."\n"; return; } sub printProbeFooter { local *OUTFILE = shift; my $probe = shift; print OUTFILE " \n"; return; } sub printProbeResults { local *OUTFILE = shift; my $probe = shift; my $result = shift; my $species1_align = ""; my $species2_align = ""; my $alignment_mask = ""; $species1_align = $probe->{species1_align} if (!ref($probe->{species1_align})); $species2_align = $probe->{species2_align} if (!ref($probe->{species2_align})); $alignment_mask = $probe->{alignment_mask} if (!ref($probe->{alignment_mask})); print OUTFILE " \n"; print OUTFILE " ".$probe->{species}."\n"; print OUTFILE " ".$probe->{start}."\n"; print OUTFILE " ".$probe->{stop}."\n"; print OUTFILE " ".$species1_align."\n"; print OUTFILE " ".$alignment_mask."\n"; print OUTFILE " ".$species2_align."\n"; print OUTFILE " $result\n"; print OUTFILE " \n"; return; }