-
Notifications
You must be signed in to change notification settings - Fork 55
/
Copy pathsqanti3_rescue.py
executable file
·143 lines (103 loc) · 5.04 KB
/
sqanti3_rescue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3
__author__ = "[email protected]"
###################################################
########## SQANTI3 RESCUE WRAPPER ##########
###################################################
#### PREPARATION ####
## Module import
import os
from src.rescue_argparse import rescue_argparse
from src.module_logging import rescue_logger, message, update_logger
from src.logging_config import rescue_art, art_logger
from src.argparse_utils import rescue_args_validation
from src.commands import run_command
from src.config import __version__
from src.rescue_steps import (
run_automatic_rescue,
rescue_candidates, rescue_targets,
run_candidate_mapping, run_rules_rescue, run_ML_rescue
)
from src.utilities.rescue import sq_requant
def main():
art_logger.info(rescue_art())
args = rescue_argparse().parse_args()
update_logger(rescue_logger,args.dir,args.log_level)
# Check if the logs directory exists, if not create it
rescue_args_validation(args)
rescue_logger.info(f"Running SQANTI3 rescue pipeline version {__version__}")
#### RUN AUTOMATIC RESCUE ####
# this part is run for both rules and ML and if all arg tests passed
message(f"Initializing SQANTI3 rescue pipeline in {args.mode} mode",rescue_logger)
prefix = f"{args.dir}/{args.output}"
#run_automatic_rescue(args)
run_automatic_rescue(args.filter_class,args.rescue_mono_exonic,
args.mode,prefix)
message("Automatic rescue completed",rescue_logger)
### RUN FULL RESCUE (IF REQUESTED) ###
if args.mode == "full":
candidates = rescue_candidates(args.filter_class,args.rescue_mono_exonic,
prefix)
rescue_logger.debug(f"Rescue candidates: {len(candidates)}")
targets = rescue_targets(args.filter_class,candidates,
args.refGTF,prefix)
rescue_logger.debug(f"Rescue targets: {len(targets)}")
#### RUN MAPPING
# when in full mode, rescue maps candidates not included in the
# automatic rescue (ISM, NIC, NNC) to long-read and reference
# isoforms passing the filter (targets)
run_candidate_mapping(args,targets,candidates)
#### RUN ML FILTER RESCUE ####
# this part combines reference ML filter run with mapping results
# and is therefore run only for ML filter
if args.subcommand == "ml":
message("Rescue-by-mapping for ML filter",rescue_logger)
# run ML-specific steps of rescue
rescued = run_ML_rescue(args)
#### RUN RULES FILTER RESCUE ####
# this part runs SQ3 rules filter for the reference transcriptome
# and combines the results with the mapping hits obtained in the previous step
if args.subcommand == "rules":
rescue_logger.info("**** RESCUE-BY-MAPPING FOR RULES FILTER")
# run rules-specific steps of rescue
rescued = run_rules_rescue(args)
# Finish print if output exists (same for rules and ML) ####
inclusion_list = f"{args.dir}/{args.output}_rescue_inclusion-list.tsv"
if os.path.isfile(inclusion_list):
message(f"Rescue {args.subcommand} finished successfully!",rescue_logger)
rescue_logger.info(f"Final rescued transcript list written to file: {inclusion_list}")
### End of condition (mode == "full")
#### WRITE FINAL OUTPUTS OF RESCUE ####
# Create new GTF including rescued transcripts #
rescue_logger.info("Adding rescued transcripts to provided SQ3 filtered GTF.")
# create file names
tmp_gtf = f"{args.dir}/rescued_only_tmp.gtf"
output_gtf = f"{args.dir}/{args.output}_rescued.gtf"
# condition: inclusion list file only produced for mode = full
# in mode = automatic, it is replaced by automatic rescue list
if args.mode == "full":
rescued_list = f"{args.dir}/{args.output}_rescue_inclusion-list.tsv"
else:
rescued_list = f"{args.dir}/{args.output}_automatic_rescued_list.tsv"
# filter reference GTF to create tmp_gtf
gtf_cmd = f"gffread --ids {rescued_list} -T -o {tmp_gtf} {args.refGTF}"
run_command(gtf_cmd,rescue_logger,"log/rescue/gtf.log",description="Filter reference GTF to create tmp GTF")
# concatenate with filtered GTF
cat_cmd = f"cat {args.rescue_gtf} {tmp_gtf} > {output_gtf}"
run_command(cat_cmd,rescue_logger,"log/rescue/cat.log",description="Concatenate filtered GTF with tmp GTF")
rescue_logger.info(f"Added rescued reference transcripts to provided GTF ({args.rescue_gtf} )")
rescue_logger.info(f"Final output GTF written to file: {output_gtf} ")
# remove tmp_gtf
os.remove(tmp_gtf)
## END ##
message("Rescue finished successfully!",rescue_logger)
if args.requant:
if not args.counts:
rescue_logger.error("Counts file is required for the requantification module.")
rescue_logger.error("\nRunning requantification.\n")
rescue_gtf, inclusion_list, counts, rescued = sq_requant.parse_files(args)
sq_requant.run_requant(rescue_gtf, inclusion_list, counts, rescued, args.dir, args.output)
sq_requant.to_tpm(rescue_gtf, args.dir, args.output)
rescue_logger.info("\nRequantification finished!\n")
## Run main()
if __name__ == "__main__":
main()