Imagine a situation that you have thousands of FASTA sequences in a file and want to shorten or edit the FASTA header instead of whole long unnecessary information.
headerparser-1.pl DOWNLOAD
#!/bin/env perl
while (<>) {
s/\s.*/\n/;
print;
}
Uses
headerparser-2.pl input.txt
headerparser-2.pl DOWNLOAD
#!/bin/env perl
while (<>) {
if (/^(>\S+)/) {
print "$1\n";
} else {
print;
}
}
Uses
headerparser-1.pl input.txt
headerparser-3.pl DOWNLOAD
#!/usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;
=head1 Synopsis
Input header >gi|120419786|gb|EH270482.2|EH270482 Gp_mxAA_21G01_M13R mxA Gammarus pulex cDNA clone Gp_mxAA_21G01 5', mRNA sequence
Output header >gi120419786
=cut
unless (@ARGV ==1){ die "Usage: shortenIDs.pl fastaFileName";}
my $origFile = shift;
my $newFile=$origFile . ".txt";
my $seq_in = Bio::SeqIO->new( -format => 'fasta',
-file => $origFile);
my $seq;
my $seq_out = Bio::SeqIO->new('-file' => ">$newFile",
'-format' => 'fasta');
while( $seq = $seq_in->next_seq() )
{
my $seqName = $seq->id;
$seqName =~ s/\|/\./g; #replace pipe with dot
$seqName =~ s/(gi)\.(\w*)\..*/$1$2/;
#my $desc = $seq->description;
$seq->id($seqName);
$seq->description("");
$seq_out->write_seq($seq);
}
print "Your sequences have been renamed and are in the file $newFile\n\n";
Uses
headerparser-3.pl input.txt
Dependencies
This FASTA header PERL script required Bio::SeqIO; for proper functioning.
Post a Comment