Skip to content

Commit 5077f12

Browse files
committed
Add CASP-CAPRI parallel processing scripts
1 parent b1b78e4 commit 5077f12

File tree

4 files changed

+98
-1185
lines changed

4 files changed

+98
-1185
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
####################### Batch Headers #########################
4+
#SBATCH -A BIF135
5+
#SBATCH -p batch
6+
#SBATCH -J make_casp_capri_dataset
7+
#SBATCH -t 0-24:00
8+
#SBATCH --mem 224G
9+
#SBATCH --nodes 4
10+
#SBATCH --ntasks-per-node 1
11+
###############################################################
12+
13+
# Remote paths #
14+
export PROJDIR=/gpfs/alpine/scratch/"$USER"/bif135/Repositories/Lab_Repositories/DIPS-Plus
15+
export PSAIADIR=/ccs/home/"$USER"/Programs/PSAIA_1.0_source/bin/linux/psa
16+
export OMP_NUM_THREADS=8
17+
18+
# Remote Conda environment #
19+
source "$PROJDIR"/miniconda3/bin/activate
20+
conda activate DIPS-Plus
21+
22+
# Load CUDA module for DGL
23+
module load cuda/10.2.89
24+
25+
# Default to using the Big Fantastic Database (BFD) of protein sequences (approx. 270GB compressed)
26+
export HHSUITE_DB=/gpfs/alpine/scratch/$USER/bif132/Data/Databases/bfd_metaclust_clu_complete_id30_c90_final_seq
27+
28+
# Run dataset compilation scripts
29+
cd "$PROJDIR"/project || exit
30+
31+
srun python3 "$PROJDIR"/project/datasets/builder/generate_hhsuite_features.py "$PROJDIR"/project/datasets/CASP-CAPRI/interim/parsed "$PROJDIR"/project/datasets/CASP-CAPRI/interim/parsed "$HHSUITE_DB" "$PROJDIR"/project/datasets/CASP-CAPRI/interim/external_feats --rank "$1" --size "$2" --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type casp_capri --write_file
32+
33+
#srun python3 "$PROJDIR"/project/datasets/builder/postprocess_pruned_pairs.py "$PROJDIR"/project/datasets/CASP-CAPRI/raw "$PROJDIR"/project/datasets/CASP-CAPRI/interim/pairs "$PROJDIR"/project/datasets/CASP-CAPRI/interim/external_feats "$PROJDIR"/project/datasets/CASP-CAPRI/final/raw --num_cpus 32 --rank "$1" --size "$2" --source_type CASP-CAPRI

project/datasets/builder/compile_evcoupling_dataset_on_andes.sh

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,3 @@ cd "$PROJDIR"/project || exit
3131
srun python3 "$PROJDIR"/project/datasets/builder/generate_hhsuite_features.py "$PROJDIR"/project/datasets/EVCoupling/interim/parsed "$PROJDIR"/project/datasets/EVCoupling/interim/parsed "$HHSUITE_DB" "$PROJDIR"/project/datasets/EVCoupling/interim/external_feats --rank "$1" --size "$2" --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type evcoupling --read_file
3232

3333
#srun python3 "$PROJDIR"/project/datasets/builder/postprocess_pruned_pairs.py "$PROJDIR"/project/datasets/EVCoupling/raw "$PROJDIR"/project/datasets/EVCoupling/interim/pairs "$PROJDIR"/project/datasets/EVCoupling/interim/external_feats "$PROJDIR"/project/datasets/EVCoupling/final/raw --num_cpus 32 --rank "$1" --size "$2" --source_type EVCoupling
34-
35-
#python3 "$PROJDIR"/project/datasets/builder/partition_dataset_filenames.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --source_type EVCoupling --rank "$1" --size "$2"
36-
#python3 "$PROJDIR"/project/datasets/builder/collect_dataset_statistics.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --rank "$1" --size "$2"
37-
#python3 "$PROJDIR"/project/datasets/builder/log_dataset_statistics.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --rank "$1" --size "$2"
38-
#python3 "$PROJDIR"/project/datasets/builder/impute_missing_feature_values.py "$PROJDIR"/project/datasets/EVCoupling/final/raw --impute_atom_features False --num_cpus 32 --rank "$1" --size "$2"
39-
40-
# Optionally convert each postprocessed (final 'raw') complex into a pair of DGL graphs (final 'processed') with labels
41-
#python3 "$PROJDIR"/project/datasets/builder/convert_complexes_to_graphs.py "$PROJDIR"/project/datasets/EVCoupling/final/raw "$PROJDIR"/project/datasets/EVCoupling/final/processed --num_cpus 32 --edge_dist_cutoff 15.0 --edge_limit 5000 --self_loops True --rank "$1" --size "$2"

0 commit comments

Comments
 (0)