Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas DEPUYDT committed Jun 14, 2021
0 parents commit 64d7d01
Show file tree
Hide file tree
Showing 29 changed files with 2,572 additions and 0 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# vHRR: co-expression based gene function prediction

## Installation

1. Clone repo
2. Add path to go.obo file in go_config.sh
3. Run `bash go_config.sh`

## Run

The folders `input` and `nextflow_config` currently contain example data. Refer to this to check data formats.

1. Replace the example data `./input/` with your own.
2. Run `bash preprocess_go_data.sh` (replace `go_gene.tsv` with the filename of your input GO annotation file).
3. Replace the input files in `nextflow_config/vHRR_single.config` or `nextflow_config/vHRR_multiple.config` with your filenames and adapt the nextflow parameters to fit your needs and system.
4. For a single atlas: run `bash vHRR_single.sh`. For multiple atlases: run `bash vHRR_multiple.sh`.


3 changes: 3 additions & 0 deletions go_config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module load python

python3 gba_config.py -obo </path/to/go.obo>
Binary file added input/example_atlas_1.tsv.gz
Binary file not shown.
Binary file added input/example_atlas_2.tsv.gz
Binary file not shown.
Binary file added input/example_atlas_3.tsv.gz
Binary file not shown.
Binary file added input/go_gene.tsv.gz
Binary file not shown.
31 changes: 31 additions & 0 deletions nextflow_config/vHRR_multiple.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Input data
params.wkdir = "/group/transreg/thpuy/vHRR"
params.atlases = "${params.wkdir}/input/example_atlas_*.tsv"
params.go_gene = "${params.wkdir}/input/go_gene_ic.tsv"
params.out_folder = "${params.wkdir}/output"


executor{
queueSize=100
}

process {

withName: prepare_starter_pack {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}

withName: scan_HRR {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}

withName: evaluate_predictions {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}
}
32 changes: 32 additions & 0 deletions nextflow_config/vHRR_single.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Input data
params.wkdir = "/group/transreg/thpuy/vHRR"
params.atlas = "${params.wkdir}/input/example_atlas_1.tsv"
params.go_gene = "${params.wkdir}/input/go_gene_ic.tsv"
params.out_folder = "${params.wkdir}/output"
params.sample_weighing = ""


executor{
queueSize=100
}

process {

withName: prepare_starter_pack {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}

withName: scan_HRR {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}

withName: evaluate_predictions {
executor = 'sge'
cpus = 1
clusterOptions = '-l h_vmem=1G'
}
}
6 changes: 6 additions & 0 deletions preprocess_go_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module load python

INFILE=input/go_gene.tsv
OUTFILE=input/go_gene_ic.tsv

python3 scripts/preprocess_go_data.py $INFILE $OUTFILE
14 changes: 14 additions & 0 deletions scripts/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import configparser
from pathlib import Path

def read_config():
config = configparser.ConfigParser()
try:
config.read('{}/.gbaconfig'.format(Path.home()))
except:
raise FileNotFoundError('[ERROR] GBA config file missing: run scripts/gba_config.py (usage: python3 gba_config.py [options])')
return config

def get_obo():
config = read_config()
return config['GO']['go_obo']
23 changes: 23 additions & 0 deletions scripts/calculate_ICu_per_GO.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/python
from sys import argv
from math import log

go_files = argv[1:]
go_data = dict() # { go_id: set(gene_id) }
total_genes = set()
def read_go_file(go_file):
with open(go_file, 'r') as reader:
for line in reader:
go_id, gene_id = line.strip().split("\t")[:2]
if not go_id in go_data:
go_data[go_id] = set()
go_data[go_id].add(gene_id)
total_genes.add(gene_id)
for go_file in go_files:
read_go_file(go_file)

total_genes = float(len(total_genes))
for go_id in sorted(go_data):
ICu = -1 * log(float(len(go_data[go_id])) / total_genes, 10) / log(total_genes, 10)
print(go_id+"\t"+str(ICu))

14 changes: 14 additions & 0 deletions scripts/extend_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/python
import go_manipulations
from os import path
from sys import argv

gene_go_file = argv[1]
go_obo = argv[2]

go_tree = go_manipulations.GOtree(go_obo)

gene_go = go_manipulations.GOgenes(gene_go_file, go_tree)
go_tree.extend(gene_go)
gene_go.write()

14 changes: 14 additions & 0 deletions scripts/filter_go_category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/python
import go_manipulations
from os import path
from sys import argv

gene_go_file = argv[1]
filter_category = argv[2]
go_obo = argv[3]

go_tree = go_manipulations.GOtree(go_obo)

go_genes = go_manipulations.GOgenes(gene_go_file, go_tree)
go_tree.filter_category(go_genes, filter_category)
go_genes.write()
50 changes: 50 additions & 0 deletions scripts/gba_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

'''
Setup user to run GBA framework
For now: only sets the go.obo file
usage: python3 gba_config.py -obo <go.obo>
'''


import argparse
import configparser

from sys import stderr
from pathlib import Path
from goatools.obo_parser import GODag


#command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("-obo", dest="go_obo", help="set location of go.obo file")
args = parser.parse_args()


# get version info
obodag_version = GODag(args.go_obo).version.split('rel(')[1].split(')')[0]


# init
config = configparser.ConfigParser()

# set values
config['GO'] = {'go_obo': args.go_obo}

# write file
with open('{}/.gbaconfig'.format(Path.home()), 'w') as outf:

outf.write(
"""# GBA config file
# ---------------
#
# go.obo :
# - file: {}
# - date: {}
""".format(args.go_obo, obodag_version)
)

config.write(outf)

stderr.write('[INFO] GBA config file writen to ~/.gbaconfig\n')
Loading

0 comments on commit 64d7d01

Please sign in to comment.