-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetidpos.pl
66 lines (52 loc) · 1.21 KB
/
getidpos.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl
use FindBin qw ($Bin);
use Getopt::Long;
GetOptions(\%opt,"list:s","gff:s","output:s","help:s");
my $help=<<USAGE;
Get position for ids in a list file
perl getidseq.pl -l id -g gene.gff -o nohit.fa > log
USAGE
if (exists $opt{help} or keys %opt < 1){
print $help;
exit;
}
my $refgff=parseGFF($opt{gff});
### store id into hash %id
my %id;
open OUT, ">$opt{output}" or die "$!";
open IN, "$opt{list}" or die "$!";
while(<IN>){
chomp $_;
next if ($_ eq "");
$id{$_}=1;
if (exists $refgff->{$_}){
print OUT "$_\t$refgff->{$_}->[0]\t$refgff->{$_}->[1]\t$refgff->{$_}->[2]\t$refgff->{$_}->[3]\n";
}
}
close IN;
close OUT;
#######################
sub parseGFF
{
my ($gff)=@_;
my %hash; ##hash to store every record by key of Seq_id
my $seq; ##Scaffold
my $id; ##ID for element
open IN, "$gff" or die "$!";
while (<IN>){
chomp $_;
next if ($_=~/^#/);
my @unit=split("\t",$_);
if ($unit[2]=~/mRNA/){
$unit[0]=~s/chr0//;
$unit[0]=~s/chr//;
$seq=$unit[0];
if ($unit[8]=~/ID=(.*);/ or $unit[8] =~/ID=(.*)/){
$id=$1;
}
$hash{$id}=[$seq,$unit[3],$unit[4],$unit[6]];
}
}
close IN;
return \%hash;
}