-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Thomas DEPUYDT
committed
Jun 14, 2021
0 parents
commit 64d7d01
Showing
29 changed files
with
2,572 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# vHRR: co-expression based gene function prediction | ||
|
||
## Installation | ||
|
||
1. Clone repo | ||
2. Add path to go.obo file in go_config.sh | ||
3. Run `bash go_config.sh` | ||
|
||
## Run | ||
|
||
The folders `input` and `nextflow_config` currently contain example data. Refer to this to check data formats. | ||
|
||
1. Replace the example data `./input/` with your own. | ||
2. Run `bash preprocess_go_data.sh` (replace `go_gene.tsv` with the filename of your input GO annotation file). | ||
3. Replace the input files in `nextflow_config/vHRR_single.config` or `nextflow_config/vHRR_multiple.config` with your filenames and adapt the nextflow parameters to fit your needs and system. | ||
4. For a single atlas: run `bash vHRR_single.sh`. For multiple atlases: run `bash vHRR_multiple.sh`. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module load python | ||
|
||
python3 gba_config.py -obo </path/to/go.obo> |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Input data | ||
params.wkdir = "/group/transreg/thpuy/vHRR" | ||
params.atlases = "${params.wkdir}/input/example_atlas_*.tsv" | ||
params.go_gene = "${params.wkdir}/input/go_gene_ic.tsv" | ||
params.out_folder = "${params.wkdir}/output" | ||
|
||
|
||
executor{ | ||
queueSize=100 | ||
} | ||
|
||
process { | ||
|
||
withName: prepare_starter_pack { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
|
||
withName: scan_HRR { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
|
||
withName: evaluate_predictions { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Input data | ||
params.wkdir = "/group/transreg/thpuy/vHRR" | ||
params.atlas = "${params.wkdir}/input/example_atlas_1.tsv" | ||
params.go_gene = "${params.wkdir}/input/go_gene_ic.tsv" | ||
params.out_folder = "${params.wkdir}/output" | ||
params.sample_weighing = "" | ||
|
||
|
||
executor{ | ||
queueSize=100 | ||
} | ||
|
||
process { | ||
|
||
withName: prepare_starter_pack { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
|
||
withName: scan_HRR { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
|
||
withName: evaluate_predictions { | ||
executor = 'sge' | ||
cpus = 1 | ||
clusterOptions = '-l h_vmem=1G' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
module load python | ||
|
||
INFILE=input/go_gene.tsv | ||
OUTFILE=input/go_gene_ic.tsv | ||
|
||
python3 scripts/preprocess_go_data.py $INFILE $OUTFILE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import configparser | ||
from pathlib import Path | ||
|
||
def read_config(): | ||
config = configparser.ConfigParser() | ||
try: | ||
config.read('{}/.gbaconfig'.format(Path.home())) | ||
except: | ||
raise FileNotFoundError('[ERROR] GBA config file missing: run scripts/gba_config.py (usage: python3 gba_config.py [options])') | ||
return config | ||
|
||
def get_obo(): | ||
config = read_config() | ||
return config['GO']['go_obo'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/usr/bin/python | ||
from sys import argv | ||
from math import log | ||
|
||
go_files = argv[1:] | ||
go_data = dict() # { go_id: set(gene_id) } | ||
total_genes = set() | ||
def read_go_file(go_file): | ||
with open(go_file, 'r') as reader: | ||
for line in reader: | ||
go_id, gene_id = line.strip().split("\t")[:2] | ||
if not go_id in go_data: | ||
go_data[go_id] = set() | ||
go_data[go_id].add(gene_id) | ||
total_genes.add(gene_id) | ||
for go_file in go_files: | ||
read_go_file(go_file) | ||
|
||
total_genes = float(len(total_genes)) | ||
for go_id in sorted(go_data): | ||
ICu = -1 * log(float(len(go_data[go_id])) / total_genes, 10) / log(total_genes, 10) | ||
print(go_id+"\t"+str(ICu)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/python | ||
import go_manipulations | ||
from os import path | ||
from sys import argv | ||
|
||
gene_go_file = argv[1] | ||
go_obo = argv[2] | ||
|
||
go_tree = go_manipulations.GOtree(go_obo) | ||
|
||
gene_go = go_manipulations.GOgenes(gene_go_file, go_tree) | ||
go_tree.extend(gene_go) | ||
gene_go.write() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/python | ||
import go_manipulations | ||
from os import path | ||
from sys import argv | ||
|
||
gene_go_file = argv[1] | ||
filter_category = argv[2] | ||
go_obo = argv[3] | ||
|
||
go_tree = go_manipulations.GOtree(go_obo) | ||
|
||
go_genes = go_manipulations.GOgenes(gene_go_file, go_tree) | ||
go_tree.filter_category(go_genes, filter_category) | ||
go_genes.write() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
|
||
''' | ||
Setup user to run GBA framework | ||
For now: only sets the go.obo file | ||
usage: python3 gba_config.py -obo <go.obo> | ||
''' | ||
|
||
|
||
import argparse | ||
import configparser | ||
|
||
from sys import stderr | ||
from pathlib import Path | ||
from goatools.obo_parser import GODag | ||
|
||
|
||
#command line arguments | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-obo", dest="go_obo", help="set location of go.obo file") | ||
args = parser.parse_args() | ||
|
||
|
||
# get version info | ||
obodag_version = GODag(args.go_obo).version.split('rel(')[1].split(')')[0] | ||
|
||
|
||
# init | ||
config = configparser.ConfigParser() | ||
|
||
# set values | ||
config['GO'] = {'go_obo': args.go_obo} | ||
|
||
# write file | ||
with open('{}/.gbaconfig'.format(Path.home()), 'w') as outf: | ||
|
||
outf.write( | ||
"""# GBA config file | ||
# --------------- | ||
# | ||
# go.obo : | ||
# - file: {} | ||
# - date: {} | ||
""".format(args.go_obo, obodag_version) | ||
) | ||
|
||
config.write(outf) | ||
|
||
stderr.write('[INFO] GBA config file writen to ~/.gbaconfig\n') |
Oops, something went wrong.