Skip to content

Commit 6211a03

Browse files
authored
Add files via upload
1 parent ffa9cf9 commit 6211a03

File tree

2 files changed

+925
-0
lines changed

2 files changed

+925
-0
lines changed

ped2fasta.pl

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
#!/usr/bin/env perl
2+
use strict;
3+
use warnings;
4+
use Getopt::Long;
5+
use File::Spec;
6+
use File::Basename qw(basename dirname);
7+
use FindBin qw($Bin $Script);
8+
use Data::Dumper;
9+
############# GetOptions #################
10+
my ($infile, $outfile, $format);
11+
GetOptions(
12+
"h|?"=>\&help,
13+
"p:s"=>\$infile,
14+
"o:s"=>\$outfile,
15+
"f:s"=>\$format,
16+
) || &help;
17+
&help unless ($infile && $outfile);
18+
19+
sub help
20+
{
21+
print"
22+
Description: convert plink ped to fasta
23+
24+
-p <file> input ped file [force]
25+
-o <file> output fasta file [force]
26+
-f <str> output format(p/f) [p]
27+
28+
-h Help document
29+
";
30+
exit;
31+
}
32+
############# start time #################
33+
my $current_T = &date_format(localtime());
34+
print "Programe start: $current_T\n\n";
35+
my $begin_time = time();
36+
##########################################
37+
$infile = &abs_dir($infile);
38+
$format ||= "p";
39+
my %iupac = ('GG' => 'G', 'CC' => 'C', 'TT' => 'T', 'AA' => 'A', 'GT' => 'K',
40+
'TG' => 'K', 'AC' => 'M', 'CA' => 'M', 'CG' => 'S', 'GC' => 'S', 'AG' => 'R',
41+
'GA' => 'R', 'AT' => 'W', 'TA' => 'W', 'CT' => 'Y', 'TC' => 'Y', '00' => 'N',
42+
);
43+
my $i = 1;
44+
45+
my $inum = 0;
46+
if($format eq "p"){
47+
open(IN, $infile);
48+
while (<IN>) {
49+
next if(/^$/);
50+
$inum++;
51+
}
52+
close(IN);
53+
}
54+
open(IN, $infile);
55+
open(OUT, ($outfile =~ /\.gz$/) ? "|gzip > $outfile" : ">$outfile");
56+
while (<IN>) {
57+
chomp;
58+
my ($fam, $ind, undef, undef, undef, undef, @seq) = split(/\s+/, $_);
59+
my $new = "";
60+
for (my $j = 0; $j < @seq - 1; $j+=2) {
61+
my $k = $j + 1;
62+
my $tmp = $seq[$j] . $seq[$k];
63+
if(!exists $iupac{$tmp}){
64+
print "$tmp\n";
65+
die;
66+
}
67+
$new .= $iupac{$tmp};
68+
}
69+
if($format eq "p"){
70+
if($i == 1){
71+
my $len = length($new);
72+
print OUT "$inum $len\n";
73+
}
74+
print OUT "$ind $new\n";
75+
}else{
76+
print OUT "$ind\n$new\n";
77+
}
78+
$i++;
79+
}
80+
close(IN);
81+
close(OUT);
82+
# AQ-1 AQ-1 0 0 0 -9 G G
83+
############# end time ###################
84+
$current_T = &date_format(localtime());
85+
print "Programe end: $current_T\n\n";
86+
&Runtime($begin_time);
87+
##########################################
88+
# sub date format
89+
sub date_format()
90+
{
91+
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = @_;
92+
sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year + 1900, $mon + 1, $day, $hour, $min, $sec);
93+
}
94+
95+
# sub Runtime
96+
sub Runtime()
97+
{
98+
my ($begin_time) = @_;
99+
my $now_time = time();
100+
my $total_time = $now_time - $begin_time;
101+
my $sec = 0; my $minu = 0; my $hour = 0;
102+
if($total_time >= 3600){
103+
$hour = int($total_time/3600);
104+
my $left = $total_time % 3600;
105+
if($left >= 60){
106+
$minu = int($left/60);
107+
$sec = $left % 60;
108+
$total_time = $hour."h\-".$minu."m\-".$sec."s";
109+
} else {
110+
$minu = 0;
111+
$sec = $left;
112+
$total_time = $hour."h\-".$minu."m\-".$sec."s";
113+
}
114+
} else {
115+
if($total_time >= 60){
116+
$minu = int($total_time/60);
117+
$sec = $total_time % 60;
118+
$total_time = $minu."m\-".$sec."s";
119+
} else {
120+
$sec = $total_time;
121+
$total_time = $sec."s";
122+
}
123+
}
124+
print "Total elapsed time [$total_time]\n\n";
125+
}
126+
127+
# sub absolutely directory
128+
sub abs_dir()
129+
{
130+
my ($in) = @_;
131+
my $current_dir = `pwd`;
132+
chomp($current_dir);
133+
my $return_dir = "";
134+
if(-f $in){
135+
my $in_dir = dirname($in);
136+
my $in_file = basename($in);
137+
chdir $in_dir;
138+
$in_dir = `pwd`;
139+
chomp($in_dir);
140+
$return_dir = "$in_dir/$in_file";
141+
} elsif(-d $in) {
142+
chdir $in;
143+
my $in_dir = `pwd`;
144+
chomp($in_dir);
145+
$return_dir = $in_dir;
146+
} else {
147+
die("ERROR: there is no file or dir called [$in], please check!\n");
148+
}
149+
chdir $current_dir;
150+
return $return_dir;
151+
}
152+
153+
# show log
154+
sub show_log()
155+
{
156+
my ($text) = @_;
157+
my $current_time = &date_format(localtime());
158+
print "$current_time: $text\n";
159+
}
160+
161+
# run or die
162+
sub run_or_die()
163+
{
164+
my ($cmd) = @_;
165+
&show_log($cmd);
166+
my $flag = system($cmd);
167+
if($flag != 0){
168+
&show_log("ERROR: command $cmd");
169+
exit(1);
170+
}
171+
&show_log("done\n");
172+
}
173+
174+
# qsub
175+
sub qsub()
176+
{
177+
my ($sh, $vf, $maxjob, $queue) = @_;
178+
$vf ||= "2G";
179+
$maxjob ||= 50;
180+
$queue ||= "all.q";
181+
my $cmd_qsub = "/Bio/bin/qsub-sge.pl --convert no --queue $queue --maxjob $maxjob --resource vf=$vf $sh";
182+
&run_or_die($cmd_qsub);
183+
}

0 commit comments

Comments
 (0)