#!/usr/bin/perl use strict; use ProbeSearch; use constant SUCCESS => 0; use constant ERROR => -1; sub getParams { my %opts; my $errstr = ""; use Getopt::Long; use Data::Dumper; GetOptions(\%opts, 'help'); $errstr = " " if (defined($opts{help})); if ($errstr) { print <<"END_USAGE"; Description: This file takes an html file and creates a probe_summary.txt file Usage: create_probe_summary.pl requires probes.html to run END_USAGE print "\n$errstr" if ($errstr); die("\n"); } } sub debug_print { my $logfile = "./debug.log"; my $msg = shift; open(LOGFILE, ">>".$logfile); print LOGFILE $msg; close (LOGFILE); } sub reverse_compliment { my $dna = shift; $dna = reverse($dna); $dna =~ tr/ATGCatgc/TACGtacg/; return $dna; } # Hard coding input file my $location = "/home/bcarlson/data"; my @species = ("mouse","baboon","cat","chicken","chimp","cow","dog","fugu","pig","rat","tetra","zfish"); my $species_dna; my $file = "probes.html"; my $line; my $found_overgo = 0; my $probe_name = ""; my $probe_results = ""; my $probe_sequence = ""; my $probe_idx; my $reverse_compliment; my $target; my $target_start_idx; my $target_file; my $process_target; my @data; my $last_target; my $last_line; my $current_line; my $input_file = "$location/$file"; my $temp; my $alignment = new ProbeSearch; open(INFILE, $input_file); while () { if ($_ =~ /Overgo Characteristics/) { last; } } for (my $i=0; $i<26; $i++) { $_ = ; } PROBE_NAME: while () { if ($_ =~ //) { for (my $i=0; $i < 1; $i++) { $_ = ; } ($probe_name) = ($_ =~ m/(.+?) <\/td>/); $probe_name =~ s/^\s+//; $probe_name =~ s/\s+$//; ($target) = ($probe_name =~ m/(\d+)h.*/); #print "Target: $target \tNAME: $probe_name\n"; for (my $i=0; $i < 7; $i++) { $_ = ; } ($probe_results) = ($_ =~ m/(.+) <\/td>/); #print "RESULT: $probe_results\n"; for (my $i=0; $i<11; $i++) { $_ = ; } ($probe_sequence) = ($_ =~ m/([ATGC]{36}) <\/td>/); #print "SEQUENCE: $probe_sequence\n\n"; push @data, [$target, $probe_name, $probe_results, $probe_sequence, "", "", "", "", "", "", "", "", "", "", "", ""]; } } close (INFILE); @data = sort {$a->[0] cmp $b->[0]} @data; for (my $i=0; $i<=$#species ; $i++) { $probe_idx = 0; $target_start_idx =0; $process_target = 0; $last_target = 0; debug_print "Processing species [$species[$i]]\n"; while ($probe_idx <= $#data) { if ($data[$probe_idx][0] ne $last_target) { $target_start_idx = $probe_idx; $last_target = $data[$probe_idx][0]; $target_file = "$location/Target$data[$probe_idx][0]/human_$species[$i]_T$data[$probe_idx][0].verbose"; debug_print "checking $target_file\n"; if (-e $target_file) { $last_target = $data[$probe_idx][0]; # if file handle is open, close it. $alignment->close_file; debug_print "Processing $target_file\n"; debug_print "=================================================================\n\n"; # Open new target file $alignment->open_file($target_file); $process_target = 1; } else { $process_target = 0; } } if ($process_target == 1) { $species_dna = ""; $alignment->set_probe($data[$probe_idx][3]); debug_print "Looking for regular [$data[$probe_idx][3]]\n"; $species_dna = $alignment->find_probe_match; if ($species_dna eq "") { $reverse_compliment = reverse_compliment($data[$probe_idx][3]); debug_print "Looking for reverse compliment [$reverse_compliment]\n"; $alignment->set_probe($reverse_compliment); $species_dna = $alignment->find_probe_match; if ($species_dna ne "") { debug_print "Species dna found, but in reverse compliment form [$species_dna]\n"; $species_dna = reverse_compliment($species_dna); } debug_print "Result of probe search [$species_dna]\n"; } debug_print "Saving species dna [$species_dna]\n"; $data[$probe_idx][$i+4] = $species_dna; debug_print "Target : [$data[$probe_idx][0]]\n"; debug_print "Target file : [$target_file]\n"; debug_print "Probe : [$data[$probe_idx][1]]\n"; debug_print "Results : [$data[$probe_idx][2]]\n"; debug_print "Human Probe : [$data[$probe_idx][3]]\n"; debug_print "Found species DNA : [$species_dna]\n\n\n"; } $probe_idx++; } } for (my $i=0; $i<=$#data; $i++) { print "Probe $i\n"; print "Target: $data[$i][0]\n"; print "Name: $data[$i][1]\n"; print "Results: $data[$i][2]\n"; print "Human : [$data[$i][3]]\n"; print "Mouse : [$data[$i][4]]\n"; print "Baboon : [$data[$i][5]]\n"; print "Cat : [$data[$i][6]]\n"; print "Chicken: [$data[$i][7]]\n"; print "Chimp : [$data[$i][8]]\n"; print "Cow : [$data[$i][9]]\n"; print "Dog : [$data[$i][10]]\n"; print "Fugu : [$data[$i][11]]\n"; print "Pig : [$data[$i][12]]\n"; print "Rat : [$data[$i][13]]\n"; print "Tetra : [$data[$i][14]]\n"; print "Zfish : [$data[$i][15]]\n"; print "\n\n"; }