|
| 1 | +#!/usr/bin/env perl |
| 2 | +use strict; |
| 3 | +use warnings; |
| 4 | +use Getopt::Long; |
| 5 | +use File::Spec; |
| 6 | +use File::Basename qw(basename dirname); |
| 7 | +use FindBin qw($Bin $Script); |
| 8 | +use Data::Dumper; |
| 9 | +############# GetOptions ################# |
| 10 | +my ($infile, $outfile, $format); |
| 11 | +GetOptions( |
| 12 | + "h|?"=>\&help, |
| 13 | + "p:s"=>\$infile, |
| 14 | + "o:s"=>\$outfile, |
| 15 | + "f:s"=>\$format, |
| 16 | +) || &help; |
| 17 | +&help unless ($infile && $outfile); |
| 18 | + |
| 19 | +sub help |
| 20 | +{ |
| 21 | + print" |
| 22 | + Description: convert plink ped to fasta |
| 23 | +
|
| 24 | + -p <file> input ped file [force] |
| 25 | + -o <file> output fasta file [force] |
| 26 | + -f <str> output format(p/f) [p] |
| 27 | +
|
| 28 | + -h Help document |
| 29 | +"; |
| 30 | + exit; |
| 31 | +} |
| 32 | +############# start time ################# |
| 33 | +my $current_T = &date_format(localtime()); |
| 34 | +print "Programe start: $current_T\n\n"; |
| 35 | +my $begin_time = time(); |
| 36 | +########################################## |
| 37 | +$infile = &abs_dir($infile); |
| 38 | +$format ||= "p"; |
| 39 | +my %iupac = ('GG' => 'G', 'CC' => 'C', 'TT' => 'T', 'AA' => 'A', 'GT' => 'K', |
| 40 | + 'TG' => 'K', 'AC' => 'M', 'CA' => 'M', 'CG' => 'S', 'GC' => 'S', 'AG' => 'R', |
| 41 | + 'GA' => 'R', 'AT' => 'W', 'TA' => 'W', 'CT' => 'Y', 'TC' => 'Y', '00' => 'N', |
| 42 | +); |
| 43 | +my $i = 1; |
| 44 | + |
| 45 | +my $inum = 0; |
| 46 | +if($format eq "p"){ |
| 47 | + open(IN, $infile); |
| 48 | + while (<IN>) { |
| 49 | + next if(/^$/); |
| 50 | + $inum++; |
| 51 | + } |
| 52 | + close(IN); |
| 53 | +} |
| 54 | +open(IN, $infile); |
| 55 | +open(OUT, ($outfile =~ /\.gz$/) ? "|gzip > $outfile" : ">$outfile"); |
| 56 | +while (<IN>) { |
| 57 | + chomp; |
| 58 | + my ($fam, $ind, undef, undef, undef, undef, @seq) = split(/\s+/, $_); |
| 59 | + my $new = ""; |
| 60 | + for (my $j = 0; $j < @seq - 1; $j+=2) { |
| 61 | + my $k = $j + 1; |
| 62 | + my $tmp = $seq[$j] . $seq[$k]; |
| 63 | + if(!exists $iupac{$tmp}){ |
| 64 | + print "$tmp\n"; |
| 65 | + die; |
| 66 | + } |
| 67 | + $new .= $iupac{$tmp}; |
| 68 | + } |
| 69 | + if($format eq "p"){ |
| 70 | + if($i == 1){ |
| 71 | + my $len = length($new); |
| 72 | + print OUT "$inum $len\n"; |
| 73 | + } |
| 74 | + print OUT "$ind $new\n"; |
| 75 | + }else{ |
| 76 | + print OUT "$ind\n$new\n"; |
| 77 | + } |
| 78 | + $i++; |
| 79 | +} |
| 80 | +close(IN); |
| 81 | +close(OUT); |
| 82 | +# AQ-1 AQ-1 0 0 0 -9 G G |
| 83 | +############# end time ################### |
| 84 | +$current_T = &date_format(localtime()); |
| 85 | +print "Programe end: $current_T\n\n"; |
| 86 | +&Runtime($begin_time); |
| 87 | +########################################## |
| 88 | +# sub date format |
| 89 | +sub date_format() |
| 90 | +{ |
| 91 | + my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = @_; |
| 92 | + sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year + 1900, $mon + 1, $day, $hour, $min, $sec); |
| 93 | +} |
| 94 | + |
| 95 | +# sub Runtime |
| 96 | +sub Runtime() |
| 97 | +{ |
| 98 | + my ($begin_time) = @_; |
| 99 | + my $now_time = time(); |
| 100 | + my $total_time = $now_time - $begin_time; |
| 101 | + my $sec = 0; my $minu = 0; my $hour = 0; |
| 102 | + if($total_time >= 3600){ |
| 103 | + $hour = int($total_time/3600); |
| 104 | + my $left = $total_time % 3600; |
| 105 | + if($left >= 60){ |
| 106 | + $minu = int($left/60); |
| 107 | + $sec = $left % 60; |
| 108 | + $total_time = $hour."h\-".$minu."m\-".$sec."s"; |
| 109 | + } else { |
| 110 | + $minu = 0; |
| 111 | + $sec = $left; |
| 112 | + $total_time = $hour."h\-".$minu."m\-".$sec."s"; |
| 113 | + } |
| 114 | + } else { |
| 115 | + if($total_time >= 60){ |
| 116 | + $minu = int($total_time/60); |
| 117 | + $sec = $total_time % 60; |
| 118 | + $total_time = $minu."m\-".$sec."s"; |
| 119 | + } else { |
| 120 | + $sec = $total_time; |
| 121 | + $total_time = $sec."s"; |
| 122 | + } |
| 123 | + } |
| 124 | + print "Total elapsed time [$total_time]\n\n"; |
| 125 | +} |
| 126 | + |
| 127 | +# sub absolutely directory |
| 128 | +sub abs_dir() |
| 129 | +{ |
| 130 | + my ($in) = @_; |
| 131 | + my $current_dir = `pwd`; |
| 132 | + chomp($current_dir); |
| 133 | + my $return_dir = ""; |
| 134 | + if(-f $in){ |
| 135 | + my $in_dir = dirname($in); |
| 136 | + my $in_file = basename($in); |
| 137 | + chdir $in_dir; |
| 138 | + $in_dir = `pwd`; |
| 139 | + chomp($in_dir); |
| 140 | + $return_dir = "$in_dir/$in_file"; |
| 141 | + } elsif(-d $in) { |
| 142 | + chdir $in; |
| 143 | + my $in_dir = `pwd`; |
| 144 | + chomp($in_dir); |
| 145 | + $return_dir = $in_dir; |
| 146 | + } else { |
| 147 | + die("ERROR: there is no file or dir called [$in], please check!\n"); |
| 148 | + } |
| 149 | + chdir $current_dir; |
| 150 | + return $return_dir; |
| 151 | +} |
| 152 | + |
| 153 | +# show log |
| 154 | +sub show_log() |
| 155 | +{ |
| 156 | + my ($text) = @_; |
| 157 | + my $current_time = &date_format(localtime()); |
| 158 | + print "$current_time: $text\n"; |
| 159 | +} |
| 160 | + |
| 161 | +# run or die |
| 162 | +sub run_or_die() |
| 163 | +{ |
| 164 | + my ($cmd) = @_; |
| 165 | + &show_log($cmd); |
| 166 | + my $flag = system($cmd); |
| 167 | + if($flag != 0){ |
| 168 | + &show_log("ERROR: command $cmd"); |
| 169 | + exit(1); |
| 170 | + } |
| 171 | + &show_log("done\n"); |
| 172 | +} |
| 173 | + |
| 174 | +# qsub |
| 175 | +sub qsub() |
| 176 | +{ |
| 177 | + my ($sh, $vf, $maxjob, $queue) = @_; |
| 178 | + $vf ||= "2G"; |
| 179 | + $maxjob ||= 50; |
| 180 | + $queue ||= "all.q"; |
| 181 | + my $cmd_qsub = "/Bio/bin/qsub-sge.pl --convert no --queue $queue --maxjob $maxjob --resource vf=$vf $sh"; |
| 182 | + &run_or_die($cmd_qsub); |
| 183 | +} |
0 commit comments