#!/usr/bin/perl # Brian Osborne # script to run genscan on all nucleotide sequences in a fasta file # and save results as the fasta files <file>.gs.pept and <file>.gs.cds, # and <file>.gs.exons use Bio::SeqIO; use Bio::Seq; use Getopt::Long; use Bio::Tools::Genscan; use strict; # GENSCAN matrix my $matrix = "/home/bosborne/src/genscan/HumanIso.smat"; my ($file,$i); GetOptions( "f|file=s" => \$file ); usage() if ( !$file ); my $pept_out = Bio::SeqIO->new(-file => ">$file.gs.pept", -format => "fasta"); my $cds_out = Bio::SeqIO->new(-file => ">$file.gs.cds", -format => "fasta"); my $exons_out = Bio::SeqIO->new(-file => ">$file.gs.exons", -format => "fasta"); my $in = Bio::SeqIO->new(-file => $file , -format => 'Fasta'); while ( my $seq = $in->next_seq() ) { die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' ); # create temp file, input to GENSCAN my $temp_out = Bio::SeqIO->new(-file => ">temp.fa", -format => "fasta"); $temp_out->write_seq($seq); my $file_id = $seq->display_id; $file_id =~ s/\|/-/g; system "genscan $matrix temp.fa -cds > $file_id.gs.raw"; unlink "temp.fa"; my $genscan = Bio::Tools::Genscan->new( -file => "$file_id.gs.raw"); while ( my $gene = $genscan->next_prediction() ) { $i++; my $pept = $gene->predicted_protein; my $cds = $gene->predicted_cds; my @exon_arr = $gene->exons; if ( defined $cds ) { my $cds_seq = Bio::Seq->new(-seq => $cds->seq, -display_id => $cds->display_id); $cds_out->write_seq($cds_seq); } if ( defined $pept ) { my $pept_seq = Bio::Seq->new(-seq => $pept->seq, -display_id => $pept->display_id); $pept_out->write_seq($pept_seq); } for my $exon (@exon_arr) { my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end . " " . $exon->primary_tag . " " . "GENSCAN_predicted_$i"; my $exon_seq = Bio::Seq->new(-seq => $seq->subseq($exon->start, $exon->end), -display_id => $seq->display_id, -desc => $desc ); $exons_out->write_seq($exon_seq); } } $genscan->close(); unlink "$file_id.gs.raw"; } sub usage { print " Usage : $0 -f <file> Function : run genscan on all nucleotide sequences in a multiple fasta file Output : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons "; exit; }