Skip to content

Commit dc1f258

Browse files
author
huangl07
committed
2018-8-11
1 parent 0793a15 commit dc1f258

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+4532
-194
lines changed

NGSTool/md5sum.pl

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env perl
2+
use strict;
3+
use warnings;
4+
my $BEGIN_TIME=time();
5+
use Getopt::Long;
6+
use Data::Dumper;
7+
use FindBin qw($Bin $Script);
8+
use File::Basename qw(basename dirname);
9+
my ($input,$output);
10+
my $version="1.0.0";
11+
GetOptions(
12+
"help|?" =>\&USAGE,
13+
"input:s"=>\$input,
14+
"output:s"=>\$output,
15+
) or &USAGE;
16+
&USAGE unless ($input and $output);
17+
my @file=`find $input -type f`;
18+
my @files=`find $input -type l`;
19+
push @file,@files;
20+
open Out,">$output";
21+
foreach my $file (@file) {
22+
my $md5sum=`md5sum $file`;
23+
print Out $md5sum;
24+
}
25+
close Out;
26+
#######################################################################################
27+
print STDOUT "\nDone. Total elapsed time : ",time()-$BEGIN_TIME,"s\n";
28+
#######################################################################################
29+
30+
sub ABSOLUTE_DIR #$pavfile=&ABSOLUTE_DIR($pavfile);
31+
{
32+
my $cur_dir=`pwd`;chomp($cur_dir);
33+
my ($in)=@_;
34+
my $return="";
35+
if(-f $in){
36+
my $dir=dirname($in);
37+
my $file=basename($in);
38+
chdir $dir;$dir=`pwd`;chomp $dir;
39+
$return="$dir/$file";
40+
}elsif(-d $in){
41+
chdir $in;$return=`pwd`;chomp $return;
42+
}else{
43+
warn "Warning just for file and dir \n$in";
44+
exit;
45+
}
46+
chdir $cur_dir;
47+
return $return;
48+
}
49+
50+
sub USAGE {#
51+
my $usage=<<"USAGE";
52+
Contact: long.huang\@majorbio.com;
53+
Script: $Script
54+
55+
Description:
56+
57+
Usage:
58+
Options:
59+
-input <file> input dir
60+
-output <file> output file name
61+
-h Help
62+
63+
USAGE
64+
print $usage;
65+
exit;
66+
}

Pipeline/02.RefAnno/bin/bin/getGeneFasta.pl

+6-1
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,16 @@
3434
my @line;
3535
my $n=0;
3636
my $m=0;
37+
my $oldchr="";
3738
while (<In>) {
3839
chomp;
3940
next if ($_ eq ""||/^$/||/^#/);
40-
my (undef,undef,$types,undef,undef,undef,undef,undef,undef)=split(/\t/,$_);
41+
my ($chr,undef,$types,undef,undef,undef,undef,undef,undef)=split(/\t/,$_);
4142
next if ($types eq "region");
43+
if ($chr ne $oldchr) {
44+
$n =0;
45+
$oldchr=$chr;
46+
}
4247
$n++;
4348
if ($n == 1 && $types ne "gene") {
4449
$flag="mRNA";

Pipeline/02.RefAnno/bin/step01.new-ref.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
mkdir $dsh if (!-d $dsh);
2323
$dsh=ABSOLUTE_DIR($dsh);
2424
open SH,">$dsh/step01.new-ref.sh";
25-
print SH "perl $Bin/bin/GRename.pl -i $ref -g $gff -o $out/ref ";
25+
print SH "gffread -o $out/ref.packaged.gff $gff && perl $Bin/bin/GRename.pl -i $ref -g $out/ref.packaged.gff -o $out/ref ";
2626
if ($chr) {
2727
print SH "-f $chr ";
2828
}

Pipeline/03.Reseq/bin/bin/cnv_anno.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
chomp;
2222
next if ($_ eq "" ||/^$/ ||/^#/ );
2323
my ($chr,$soft,$type,$pos1,$pos2,undef,undef,undef,$info)=split(/\s+/,$_);
24-
next if ($type ne "gene" && $type ne "mRNA");
24+
next if ($type ne "gene");
2525
my $id;
2626
if ( $info =~ /Name=([^;]*);/ || $info =~ /ID=([^;]*);/ ) {
2727
$id=$1;

Pipeline/03.Reseq/bin/bin/eff-enrich.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if ( is.null(opt$input)) { print_usage(spec)}
3030
if ( is.null(opt$output)){ print_usage(spec) }
3131
times<-Sys.time()
3232

33-
data<-read.table(opt$input,sep="\t",comment.char="^",head=TRUE)
33+
data<-read.table(opt$input,sep="\t",comment.char="^",head=TRUE,quote="")
3434
names(data)<-c("id","description","k","M","n","N");
3535
data<-na.omit(data)
3636
pvalue<-phyper(data$k,data$M,data$N-data$M,data$n,lower.tail=FALSE);

Pipeline/03.Reseq/bin/bin/sv.stat.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
while (<In>) {
2222
chomp;
2323
next if ($_ eq "" ||/^$/ ||/^#/);
24-
my ($chr1,$pos1,$chr2,$pos2,$length,$type,$pvalue,$genenum,$geneinfo)=split(/\t/,$_);
24+
my ($chr1,$pos1,$chr2,$pos2,$length,$type,$pvalue,$depth,$genenum,$geneinfo)=split(/\t/,$_);
2525
$stat{$type}{total}++;
2626
$stat{$type}{gene}++ if ($genenum !=0);
2727
$length{$length}{$type}++;

Pipeline/03.Reseq/bin/bin/sv_anno.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
chomp;
2222
next if ($_ eq "" ||/^$/ ||/^#/ );
2323
my ($chr,$soft,$type,$pos1,$pos2,undef,undef,undef,$info)=split(/\t/,$_);
24-
next if ($type ne "gene" && $type ne "mRNA");
24+
next if ($type ne "gene");
2525
my $id;
2626
if ( $info =~ /Name=([^;]*)\;/ || $info =~ /ID=([^;]*)\;/ ) {
2727
$id=$1;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env Rscript
2+
times<-Sys.time();
3+
library('getopt')
4+
library(pheatmap)
5+
options(bitmapType='cairo')
6+
spec = matrix(c(
7+
'i','a',0,'character',
8+
'o','b',0,'character',
9+
'help','c', 0, 'logical'
10+
), byrow=TRUE, ncol=4);
11+
opt = getopt(spec);
12+
print_usage <- function(spec=NULL){
13+
cat(getopt(spec, usage=TRUE));
14+
cat("Usage example: \n")
15+
cat("
16+
Usage example:
17+
18+
Usage:
19+
--i different matrix between samples
20+
--o figure name
21+
--help usage
22+
\n")
23+
q(status=1);
24+
}
25+
26+
if ( !is.null(opt$help) ) { print_usage(spec) }
27+
if ( is.null(opt$i) ) { print_usage(spec) }
28+
if ( is.null(opt$o) ) { print_usage(spec) }
29+
30+
31+
32+
dat_raw <- read.table(opt$i, sep = "\t", header = T,check.names=F)
33+
rownames(dat_raw)<-dat_raw[,1]
34+
dat_raw<-dat_raw[,-1]
35+
dat_raw<-t(dat_raw)
36+
outpdf<-paste(opt$o,".pdf",sep="")
37+
pheatmap(dat_raw,cluster_rows=T,cluster_cols=T,
38+
fontsize_number = 8,
39+
filename=outpdf,
40+
)
41+
outpng<-paste(opt$o,".png",sep="")
42+
pheatmap(dat_raw,cluster_rows=T,cluster_cols=T,
43+
fontsize_number = 8,
44+
filename=outpng,
45+
)
46+
escaptime=Sys.time()-times;
47+
print("Done!")
48+
print(escaptime)
+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/perl -w
2+
use strict;
3+
use warnings;
4+
my $BEGIN_TIME=time();
5+
use Getopt::Long;
6+
my ($tags,$fOut,$sampleID);
7+
use Data::Dumper;
8+
use FindBin qw($Bin $Script);
9+
use File::Basename qw(basename dirname);
10+
my $version="1.0.0";
11+
GetOptions(
12+
"help|?" =>\&USAGE,
13+
"i:s"=>\$tags,
14+
"o:s"=>\$fOut,
15+
) or &USAGE;
16+
&USAGE unless ($tags and $fOut );
17+
open In,"$tags";
18+
open Out,">$fOut";
19+
print Out "#sampleID\ttotal tags\ttotal depth\taverage depth\n";
20+
while (<In>) {
21+
chomp;
22+
next if ($_ eq "" || /^$/ || /^#/);
23+
my ($sample,$ustacks)=split(/\s+/,$_);
24+
open STAT,"$ustacks.tags.stat";
25+
while (<STAT>) {
26+
next if ($_ eq "" || /^$/ || /^#/);
27+
print Out $_;
28+
}
29+
close STAT;
30+
}
31+
close In;
32+
close Out;
33+
34+
#######################################################################################
35+
print STDOUT "\nDone. Total elapsed time : ",time()-$BEGIN_TIME,"s\n";
36+
#######################################################################################
37+
sub ABSOLUTE_DIR #$pavfile=&ABSOLUTE_DIR($pavfile);
38+
{
39+
my $cur_dir=`pwd`;chomp($cur_dir);
40+
my ($in)=@_;
41+
my $return="";
42+
if(-f $in){
43+
my $dir=dirname($in);
44+
my $file=basename($in);
45+
chdir $dir;$dir=`pwd`;chomp $dir;
46+
$return="$dir/$file";
47+
}elsif(-d $in){
48+
chdir $in;$return=`pwd`;chomp $return;
49+
}else{
50+
warn "Warning just for file and dir \n$in";
51+
exit;
52+
}
53+
chdir $cur_dir;
54+
return $return;
55+
}
56+
57+
sub USAGE {#
58+
my $usage=<<"USAGE";
59+
Contact: long.huang\@majorbio.com;
60+
Script: $Script
61+
Description:
62+
fq thanslate to fa format
63+
eg:
64+
perl $Script -i -o -k -c
65+
66+
Usage:
67+
Options:
68+
-i <file> input file name
69+
-o <file> output file
70+
-h Help
71+
72+
USAGE
73+
print $usage;
74+
exit;
75+
}
+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env Rscript
2+
times<-Sys.time();
3+
library('getopt');
4+
options(bitmapType='cairo');
5+
spec = matrix(c(
6+
'base','b',0,'character',
7+
'qual','q',0,'character',
8+
'out','o',0,'character',
9+
'help' , 'e', 0, 'logical'
10+
), byrow=TRUE, ncol=4);
11+
opt = getopt(spec);
12+
print_usage <- function(spec=NULL){
13+
cat(getopt(spec, usage=TRUE));
14+
cat("Usage example: \n")
15+
cat("
16+
Usage example:
17+
Rscript QC.r --base --qual --key --od
18+
19+
Usage:
20+
--base base distribution file
21+
--qual base quality file
22+
--key sample ID
23+
--od output dir
24+
--help usage
25+
\n")
26+
q(status=1);
27+
}
28+
29+
if ( !is.null(opt$help) ) { print_usage(spec) }
30+
if ( is.null(opt$base) ) { print_usage(spec) }
31+
if ( is.null(opt$qual) ) { print_usage(spec) }
32+
if ( is.null(opt$out) ) { print_usage(spec) }
33+
34+
read1<-read.table(opt$base,header=FALSE)
35+
read2<-read.table(opt$qual,header=FALSE)
36+
37+
read1<-read1[complete.cases(read1),]
38+
read2<-read2[complete.cases(read2),]
39+
40+
num<-(length(read2[,1])/2)
41+
for (i in 1:(length(read2[,1])-1)) {if ((as.numeric(read2[i+1,1])-as.numeric(read2[i,1]))!=1) {num<-i}}
42+
43+
quan<-read2[,ncol(read2)]
44+
quan<-10^(quan/10*-1)
45+
max_quan=ceiling(max(quan)/0.001)*0.001
46+
Aper<-read1[,2]
47+
Cper<-read1[,5]
48+
Gper<-read1[,4]
49+
Tper<-read1[,3]
50+
Nper<-read1[,6]
51+
max_per=ceiling(mean(c(Aper,Cper,Gper,Tper,Nper))/0.25)*0.25*2
52+
axix_at<-c(1,50,100,num,num+50,num+100,length(read2[,1]))
53+
axix_txt<-c(1,50,100,num,50,100,length(read2[,1])/2)
54+
55+
56+
pdf(file=paste(opt$od,"/",opt$out,".qual.pdf",sep=""))
57+
barplot(quan*100,col='springgreen2',space=0,ylab='Error rate(%)',border= NA,ylim=c(0,max_quan*100),xlab='Reads position(bp)',main='Quality distribution')
58+
axis(1,labels=axix_txt,at=axix_at)
59+
abline(v=num+0.5, lty=2,col='darkgray')
60+
box()
61+
dev.off()
62+
png(file=paste(opt$od,"/",opt$out,".qual.png",sep=""))
63+
barplot(quan*100,col='springgreen2',space=0,ylab='Error rate(%)',border= NA,ylim=c(0,max_quan*100),xlab='Reads position(bp)',main='Quality distribution')
64+
axis(1,labels=axix_txt,at=axix_at)
65+
abline(v=num+0.5, lty=2,col='darkgray')
66+
box()
67+
dev.off()
68+
69+
pdf(file=paste(opt$od,"/",opt$out,".base.pdf",sep=""))
70+
plot(Aper,col='red',type = 'l',xlab='Reads position(bp)',ylab='Percent(%)',ylim=c(0,max_per),xaxt="n",lty=1,lwd=1.5,main="Base distribution")
71+
axis(1,labels=axix_txt,at=axix_at)
72+
abline(v=num+0.5, lty=2,col='darkgray')
73+
lines(Tper,col='orange',lty=1,lwd=1.5)
74+
lines(Gper,col='green',lty=1,lwd=1.5)
75+
lines(Cper,col='blue',lty=1,lwd=1.5)
76+
lines(Nper,col='black',lty=1,lwd=1.5)
77+
legend("topright",c("A","T","G","C","N"),lty=c(1,1,1,1,1),col=c("red","orange","green","blue","black"))
78+
dev.off()
79+
80+
png(file=paste(opt$od,"/",opt$out,".base.png",sep=""))
81+
plot(Aper,col='red',type = 'l',xlab='Reads position(bp)',ylab='Percent(%)',ylim=c(0,max_per),xaxt="n",lty=1,lwd=1.5,main="Base distribution")
82+
axis(1,labels=axix_txt,at=axix_at)
83+
abline(v=num+0.5, lty=2,col='darkgray')
84+
lines(Tper,col='orange',lty=1,lwd=1.5)
85+
lines(Gper,col='green',lty=1,lwd=1.5)
86+
lines(Cper,col='blue',lty=1,lwd=1.5)
87+
lines(Nper,col='black',lty=1,lwd=1.5)
88+
legend("topright",c("A","T","G","C","N"),lty=c(1,1,1,1,1),col=c("red","orange","green","blue","black"))
89+
dev.off()
90+
91+
escaptime=Sys.time()-times;
92+
print("Done!")
93+
print(escaptime)
94+
q()
12.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)