-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetidgff.pl
71 lines (55 loc) · 1.24 KB
/
getidgff.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl
use FindBin qw ($Bin);
use Getopt::Long;
GetOptions(\%opt,"list:s","gff:s","output:s","help:s");
my $help=<<USAGE;
Get gff for ids in a list file
perl getidseq.pl -l id -g gene.gff -o id.gff > log
USAGE
if (exists $opt{help} or keys %opt < 1){
print $help;
exit;
}
my $refgff=parseGFF($opt{gff});
### store id into hash %id
my %id;
open OUT, ">$opt{output}" or die "$!";
open IN, "$opt{list}" or die "$!";
while(<IN>){
chomp $_;
next if ($_ eq "");
my @unit=split("\t",$_);
my $id=$unit[0];
#my $id=$1 if ($unit[0]=~/(.*)\_\w+/);
if (exists $refgff->{$id}){
print OUT "$refgff->{$id}";
}
}
close IN;
close OUT;
sub parseGFF
{
my ($gff)=@_;
my %hash; ##hash to store every record by key of Seq_id
my $seq;
my $id; ##ID for element
my $record;##all line for this record
open IN, "$gff" or die "$!";
while (<IN>){
chomp $_;
next if ($_=~/^#/);
my @unit=split("\t",$_);
if ($unit[2]=~/mRNA/){
$seq=$unit[0];
if ($unit[8]=~/ID=(.*?);/ or $unit[8] =~/ID=(.*)/){
$id=$1;
}
$record="$_\n";
$hash{$id}=$record;
}elsif($unit[0] eq $seq and $unit[8] =~ /Parent=$id/){
$hash{$id}.="$_\n";
}
}
close IN;
return \%hash;
}