Skip to content

Commit ee48a88

Browse files
author
Nicolas Servant
committed
support for SGE scheduler
1 parent edc73b3 commit ee48a88

9 files changed

+193
-24
lines changed

LOGBOOK

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ o Think about a docker version of HiC-Pro
2424
##
2525
################################################################################
2626

27+
##--------------------
28+
## 08-01-16
29+
##--------------------
30+
31+
o Extend HiC-pro to work with other schedulers such as SGE or SLURM
32+
2733
##--------------------
2834
## 21-10-15
2935
##--------------------

NEWS

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1+
2+
***********************************
3+
CHANGES IN VERSION 2.7.2b
4+
5+
NEW FEATURES
6+
7+
o Add support for the SGE scheduler thanks to Guipeng Li !
8+
9+
SIGNIFICANT USER-VISIBLE CHANGES
10+
11+
o be careful - configuration files for installing and running HiC-Pro have been updated to manage multiple scheduler !
12+
13+
14+
115
***********************************
2-
CHANGES IN VERSION 2.7.1b
16+
CHANGES IN VERSION 2.7.1
317

418
NEW FEATURES
519

config-hicpro.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ MAPC_OUTPUT = hic_results
1111
RAW_DIR = rawdata
1212

1313
#######################################################################
14-
## SYSTEM - PBS - Start Editing Here !!
14+
## SYSTEM AND SCHEDULER - Start Editing Here !!
1515
#######################################################################
1616
N_CPU = 2
1717
LOGFILE = hicpro.log
1818

19-
PBS_SUFFIX =
20-
PBS_MEM =
21-
PBS_WALLTIME =
22-
PBS_QUEUE =
23-
PBS_MAIL =
19+
JOB_NAME =
20+
JOB_MEM =
21+
JOB_WALLTIME =
22+
JOB_QUEUE =
23+
JOB_MAIL =
2424

2525
#########################################################################
2626
## Data

config-install.txt

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ BOWTIE2_PATH =
88
SAMTOOLS_PATH =
99
R_PATH =
1010
PYTHON_PATH =
11+
CLUSTER_SYS =

scripts/Makefile

+3-3
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ else
4848
include $(CONFIG_SYS)
4949
endif
5050

51-
make_torque_script: config_check init
51+
make_cluster_script: config_check init
5252
ifdef MAKE_OPTS
53-
@$(SCRIPTS)/make_torque_script.sh -c $(CONFIG_FILE) -s $(MAKE_OPTS)
53+
@$(CLUSTER_SCRIPT) -c $(CONFIG_FILE) -s $(MAKE_OPTS)
5454
else
55-
@$(SCRIPTS)/make_torque_script.sh -c $(CONFIG_FILE)
55+
@$(CLUSTER_SCRIPT) -c $(CONFIG_FILE)
5656
endif
5757

5858
clean:

scripts/hic.inc.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,12 @@ get_hic_files()
193193
{
194194
local idir=$1
195195
local ext=$2
196+
if [ ! -z "$PBS_ARRAYID" ]; then TASKID=$PBS_ARRAYID; fi
197+
if [ ! -z "$SGE_TASK_ID" ]; then TASKID=$SGE_TASK_ID; fi
196198
if [ ! -z "$FASTQFILE" ]; then
197-
if [ ! -z "$PBS_ARRAYID" ]; then
199+
if [ ! -z "$TASKID" ]; then
198200
local input_data_type=$(get_data_type)
199-
cat $FASTQFILE | filter_rawdir | filter_pairs | awk "NR == $PBS_ARRAYID {printf(\"%s/%s${ext}\n\", \"$idir\", gensub(\".${input_data_type}(.gz)*\", \"\", \$1));}"
201+
cat $FASTQFILE | filter_rawdir | filter_pairs | awk "NR == $TASKID {printf(\"%s/%s${ext}\n\", \"$idir\", gensub(\".${input_data_type}(.gz)*\", \"\", \$1));}"
200202
return
201203
fi
202204
local list=

scripts/install/install_dependencies.sh

+17
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,23 @@ echo "SCRIPTS = ${install_dir}/scripts" >> config-system.txt
370370
echo "SOURCES = ${install_dir}/scripts/src" >> config-system.txt
371371
echo "ANNOT_DIR = ${install_dir}/annotation" >> config-system.txt
372372

373+
## deal with scheduler system
374+
if [ -z "$CLUSTER_SYS" ]; then
375+
echo -e "$RED""Warning : Scheduler system not defined - Default is Torque/PBS""$NORMAL"
376+
CLUSTER_SYS="TORQUE";
377+
fi
378+
if [ $CLUSTER_SYS == "TORQUE" ]; then
379+
#ln -s scripts/make_torque_scripts.sh scripts/make_cluster_scripts.sh
380+
echo "CLUSTER_SCRIPT = ${install_dir}/scripts/make_torque_script.sh" >> config-system.txt
381+
echo -e "$BLUE""Configuration for TORQUE/PBS system.""$NORMAL"
382+
elif [ $CLUSTER_SYS == "SGE" ]; then
383+
#ln -s scripts/make_sge_scripts.sh scripts/make_cluster_scripts.sh
384+
echo "CLUSTER_SCRIPT = ${install_dir}/scripts/make_sge_script.sh" >> config-system.txt
385+
echo -e "$BLUE""Configuration for SGE system.""$NORMAL"
386+
else
387+
die "$CLUSTER_SYS unknown. Only 'TORQUE' and 'SGE' system are supported for now. Please change the CLUSTER_SYS variable and re-run the installation process. Exit."
388+
fi
389+
373390
## check rights in PREFIX folder
374391
if [[ -z $PREFIX ]]; then PREFIX=/local/bin; fi
375392
if [ ! -w $PREFIX ]; then

scripts/make_sge_script.sh

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/bin/bash
2+
3+
## HiC-Pro
4+
## Copyright (c) 2015 Institut Curie
5+
## Author(s): Guipeng Li, Nicolas Servant
6+
## Contact: [email protected]
7+
## This software is distributed without any guarantee under the terms of the BSD-3 licence.
8+
## See the LICENCE file for details
9+
10+
##
11+
## Create SGE files
12+
##
13+
14+
dir=$(dirname $0)
15+
16+
usage()
17+
{
18+
echo "usage: $0 -c CONFIG [-s STEP]"
19+
}
20+
21+
MAKE_OPTS=""
22+
23+
while [ $# -gt 0 ]
24+
do
25+
case "$1" in
26+
(-c) conf_file=$2; shift;;
27+
(-s) MAKE_OPTS=$2; shift;;
28+
(--) shift; break;;
29+
(-*) echo "$0: error - unrecognized option $1" 1>&2; exit 1;;
30+
(*) suffix=$1; break;;
31+
esac
32+
shift
33+
done
34+
35+
if [ -z "$conf_file" ]; then usage; exit 1; fi
36+
37+
CONF=$conf_file . $dir/hic.inc.sh
38+
unset FASTQFILE
39+
40+
## Define input files
41+
if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]]
42+
then
43+
inputfile=inputfiles_${JOB_NAME}.txt
44+
get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile
45+
count=$(cat $inputfile | wc -l)
46+
elif [[ $MAKE_OPTS == *"proc_hic"* ]]
47+
then
48+
inputfile=inputfiles_${JOB_NAME}.txt
49+
get_hic_files $RAW_DIR .bam | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile
50+
count=$(cat $inputfile | wc -l)
51+
fi
52+
53+
## Paralelle Implementation
54+
if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* || $MAKE_OPTS == *"proc_hic"* ]]
55+
then
56+
make_target="all_qsub"
57+
## Remove per sample steps
58+
if [[ $MAKE_OPTS != "" ]]; then
59+
make_target=$(echo $MAKE_OPTS | sed -e 's/,/ /g');
60+
make_target=$(echo $make_target | sed -e 's/merge_persample//g');
61+
make_target=$(echo $make_target | sed -e 's/build_contact_maps//g');
62+
make_target=$(echo $make_target | sed -e 's/ice_norm//g');
63+
make_target=$(echo $make_target | sed -e 's/quality_checks//g');
64+
fi
65+
66+
## step 1 - parallel
67+
sge_script=HiCPro_step1_${JOB_NAME}.sh
68+
PPN=$(( ${N_CPU} * 2))
69+
cat > ${sge_script} <<EOF
70+
#!/bin/bash
71+
#$ -l h_vmem=${JOB_MEM}
72+
#$ -l h_rt=${JOB_WALLTIME}
73+
#$ -M ${JOB_MAIL}
74+
#$ -m ae
75+
#$ -j y
76+
#$ -N HiCpro_s1_${JOB_NAME}
77+
##$ -q ${JOB_QUEUE}
78+
#$ -V
79+
#$ -t 1-$count
80+
#$ -pe shm ${PPN}
81+
#$ -cwd
82+
83+
FASTQFILE=$inputfile; export FASTQFILE
84+
make --file ${SCRIPTS}/Makefile CONFIG_FILE=${conf_file} CONFIG_SYS=${INSTALL_PATH}/config-system.txt $make_target 2>&1
85+
EOF
86+
87+
chmod +x ${sge_script}
88+
89+
## User message
90+
echo "The following command will launch the parallel workflow through $count sge jobs:"
91+
echo qsub ${sge_script}
92+
fi
93+
94+
95+
## Per sample Implementation
96+
if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"build_contact_maps"* || $MAKE_OPTS == *"ice_norm"* || $MAKE_OPTS == *"quality_checks"* ]]
97+
then
98+
make_target="all_persample"
99+
## Remove parallele mode
100+
if [[ $MAKE_OPTS != "" ]];
101+
then
102+
make_target=$(echo $MAKE_OPTS | sed -e 's/,/ /g');
103+
make_target=$(echo $make_target | sed -e 's/mapping//g');
104+
make_target=$(echo $make_target | sed -e 's/proc_hic//g');
105+
fi
106+
107+
sge_script_s2=HiCPro_step2_${JOB_NAME}.sh
108+
cat > ${sge_script_s2} <<EOF
109+
#!/bin/bash
110+
#$ -l h_vmem=${JOB_MEM}
111+
#$ -l h_rt=${JOB_WALLTIME}
112+
#$ -M ${JOB_MAIL}
113+
#$ -m ae
114+
#$ -j y
115+
#$ -N HiCpro_s2_${JOB_SUFFIX}
116+
##$ -q ${JOB_QUEUE}
117+
#$ -V
118+
#$ -cwd
119+
120+
make --file ${SCRIPTS}/Makefile CONFIG_FILE=${conf_file} CONFIG_SYS=${INSTALL_PATH}/config-system.txt $make_target 2>&1
121+
EOF
122+
123+
chmod +x ${sge_script_s2}
124+
125+
## User message
126+
echo "The following command will merge the processed data and run the remaining steps per sample:"
127+
echo qsub ${sge_script_s2}
128+
fi
129+

scripts/make_torque_script.sh

+12-12
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@ unset FASTQFILE
4040
## Define input files
4141
if [[ $MAKE_OPTS == "" || $MAKE_OPTS == *"mapping"* ]]
4242
then
43-
inputfile=inputfiles_${PBS_SUFFIX}.txt
43+
inputfile=inputfiles_${JOB_NAME}.txt
4444
get_hic_files $RAW_DIR .fastq | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile
4545
count=$(cat $inputfile | wc -l)
4646
elif [[ $MAKE_OPTS == *"proc_hic"* ]]
4747
then
48-
inputfile=inputfiles_${PBS_SUFFIX}.txt
48+
inputfile=inputfiles_${JOB_NAME}.txt
4949
get_hic_files $RAW_DIR .bam | grep $PAIR1_EXT | sed -e "s|$RAW_DIR||" -e "s|^/||" > $inputfile
5050
count=$(cat $inputfile | wc -l)
5151
fi
@@ -64,16 +64,16 @@ then
6464
fi
6565

6666
## step 1 - parallel
67-
torque_script=HiCPro_step1_${PBS_SUFFIX}.sh
67+
torque_script=HiCPro_step1_${JOB_NAME}.sh
6868
PPN=$(( ${N_CPU} * 2))
6969
cat > ${torque_script} <<EOF
7070
#!/bin/bash
71-
#PBS -l nodes=1:ppn=${PPN},mem=${PBS_MEM},walltime=${PBS_WALLTIME}
72-
#PBS -M ${PBS_MAIL}
71+
#PBS -l nodes=1:ppn=${PPN},mem=${JOB_MEM},walltime=${JOB_WALLTIME}
72+
#PBS -M ${JOB_MAIL}
7373
#PBS -m ae
7474
#PBS -j eo
75-
#PBS -N HiCpro_s1_${PBS_SUFFIX}
76-
#PBS -q ${PBS_QUEUE}
75+
#PBS -N HiCpro_s1_${JOB_NAME}
76+
#PBS -q ${JOB_QUEUE}
7777
#PBS -V
7878
#PBS -t 1-$count
7979
@@ -103,15 +103,15 @@ then
103103
make_target=$(echo $make_target | sed -e 's/proc_hic//g');
104104
fi
105105

106-
torque_script_s2=HiCPro_step2_${PBS_SUFFIX}.sh
106+
torque_script_s2=HiCPro_step2_${JOB_NAME}.sh
107107
cat > ${torque_script_s2} <<EOF
108108
#!/bin/bash
109-
#PBS -l nodes=1:ppn=1,mem=${PBS_MEM},walltime=${PBS_WALLTIME}
110-
#PBS -M ${PBS_MAIL}
109+
#PBS -l nodes=1:ppn=1,mem=${JOB_MEM},walltime=${JOB_WALLTIME}
110+
#PBS -M ${JOB_MAIL}
111111
#PBS -m ae
112112
#PBS -j eo
113-
#PBS -N HiCpro_s2_${PBS_SUFFIX}
114-
#PBS -q ${PBS_QUEUE}
113+
#PBS -N HiCpro_s2_${JOB_NAME}
114+
#PBS -q ${JOB_QUEUE}
115115
#PBS -V
116116
117117
cd \$PBS_O_WORKDIR

0 commit comments

Comments
 (0)