-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNEBtest
94 lines (76 loc) · 2.66 KB
/
NEBtest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#FASTQ_Me and CpG_Me with placenta samples that used NEB kit
cd /Unaligned/Project_L3_Nova062P
# Arguments
project=Pilot
#https://bioinformatics.ucdavis.edu/research-computing/documentation/archiving-slims-data/
SLIMSstring=604up50qc5
SLIMSdir=Unaligned/Project_L3_Nova062P
genome=hg38
programsPATH=/share/lasallelab/programs
echo "Creating a file of unique IDs based on first string from underscore delimiter"
ls -1 *fastq.gz | \
awk -F '_' '{print $1}' | \
sort -u > \
task_samples.txt
echo "Printing merge commands"
mergeLanesTest(){
i=$1
echo cat ${i}\_*_R1_001.fastq.gz \> ${i}\_1.fq.gz
echo cat ${i}\_*_R2_001.fastq.gz \> ${i}\_2.fq.gz
}
export -f mergeLanesTest
cat task_samples.txt | parallel --jobs 6 --will-cite mergeLanesTest
echo "Merging ${lanes} lanes for ${R1} samples"
mergeLanes(){
i=$1
cat ${i}\_*_R1_001.fastq.gz > ${i}\_1.fq.gz
cat ${i}\_*_R2_001.fastq.gz > ${i}\_2.fq.gz
}
export -f mergeLanes
cat task_samples.txt | parallel --jobs 6 --verbose --will-cite mergeLanes
# Concise merging calls but doesn't make it easy for those who don't use GNU parallel
#parallel --jobs 6 "echo cat {}_*_R1_001.fastq.gz \> {}\_1.fq.gz" ::: `cat task_samples.txt`
#parallel --jobs 6 "echo cat {}_*_R2_001.fastq.gz \> {}\_2.fq.gz" ::: `cat task_samples.txt`
#parallel --jobs 6 --verbose "cat {}_"\*"_R1_001.fastq.gz > {}_1.fq.gz" ::: `cat task_samples.txt`
#parallel --jobs 6 --verbose "cat {}_"\*"_R2_001.fastq.gz > {}_2.fq.gz" ::: `cat task_samples.txt`
echo "Creating directories for CpG_Me"
mkdir ../../raw_sequences
mv task_samples.txt ../..
mv *.fq.gz ../../raw_sequences
echo "Checking read pairs"
cd ../../raw_sequences
pairedReads=$(($(ls | wc -l)/2))
if [ ${pairedReads} = ${R1} ]
then
echo "${pairedReads} will be aligned using CpG_Me"
else
echo "ERROR: Incorrect number of merged read pairs in raw_sequences"
echo "There are ${pairedReads} but there should be ${R1}"
exit 1
fi
echo "Submitting $genome alignment command to cluster for ${project}"
cd ..
call="sbatch \
--array=1-${pairedReads} \
${programsPATH}/CpG_Me/Paired-end/CpG_Me_PE_controller.sh \
${genome}"
echo ${call}
eval ${call}
echo "Done"
exit 0
cd /Unaligned/Project_L3_Nova062P
sbatch /CpG_Me/Paired-end/CpG_Me_PE_QC.sh
cd NEBtest/604up50qc5
# Arguments
project=Pilot
#https://bioinformatics.ucdavis.edu/research-computing/documentation/archiving-slims-data/
SLIMSstring=604up50qc5
SLIMSdir=Unaligned/Project_L3_Nova062P
genome=hg38
programsPATH=/share/lasallelab/programs
echo "Submitting $genome alignment command to cluster for ${project}"
sbatch --array=2 /share/lasallelab/programs/CpG_Me/Paired-end/CpG_Me_PE_controller.sh hg38
echo ${call}
eval ${call}
echo "Done"
exit 0