-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgff_unique_for_flybase.pl
More file actions
69 lines (64 loc) · 1.37 KB
/
Copy pathgff_unique_for_flybase.pl
File metadata and controls
69 lines (64 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#! perl -w
my $pep_infile = 'dmel-all-translation-r6.63.fasta';
my $pep_outfile = 'dmel.pep.fa';
my $cds_infile = 'dmel-all-transcript-r6.63.fasta';
my $cds_outfile = 'dmel.cds.fa';
open IN, "<", $cds_infile;
while(<IN>){
if(/^>(\S+)/){ $id = $1; }
else{
s/\s+//g;
$cds{$id} .= $_;
}
}
close IN;
open IN, "<", $pep_infile;
while(<IN>){
if(/^>(\S+)/){
my $id = $1;
next unless /type\=polypeptide/;
print $_ unless /length\=(\d+)/;
my $len = $1;
print $_ unless /parent\=(\w+)\,(\w+);/;
my $gid = $1;
my $tid = $2;
print $_ unless /name\=(\S+);/;
my $name = $1;
$len{$gid}{$id} = $len;
$name{$id} = $name;
$gid{$id} = $gid;
$tid{$id} = $tid;
}
}
close IN;
foreach my $gid (keys %len){
my $cnt = 0;
foreach my $id (sort {$len{$gid}{$b} <=> $len{$gid}{$a}} keys %{$len{$gid}}){
$cnt ++;
if($cnt == 1){
$flag{$id} = 1;
last;
}
}
}
open IN, "<", $pep_infile;
open OUT1, ">", $pep_outfile;
open OUT2, ">", $cds_outfile;
open OUT3, ">", "fly.annotations";
while(<IN>){
if(/^>(\S+)/){
$flag = 0;
my $id = $1;
if(defined($flag{$id})){
$flag = 1;
print OUT1 ">$gid{$id} $name{$id}\n";
print OUT2 ">$gid{$id} $name{$id}\n$cds{$tid{$id}}\n";
print OUT3 "$gid{$id}\t$name{$id}\n";
}
}
elsif($flag){ print OUT1 $_; }
}
close IN;
close OUT1;
close OUT2;
close OUT3;