diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index a564e55..705d76c 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -21,9 +21,8 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: - python-version: 3.8 - mamba-version: "*" - channels: conda-forge,bioconda,defaults + miniforge-version: latest + channels: conda-forge,bioconda channel-priority: true - name: install dependancies diff --git a/db/default_template.docx b/db/default_template.docx index 6ad3ba9..9d5213c 100644 Binary files a/db/default_template.docx and b/db/default_template.docx differ diff --git a/tb-profiler b/tb-profiler index 883b8a7..9aa9c62 100644 --- a/tb-profiler +++ b/tb-profiler @@ -34,6 +34,7 @@ discovered_plugins = { __softwarename__ = 'tbprofiler' +__default_db_dir__ = f'{sys.base_prefix}/share/{__softwarename__}' @atexit.register def cleanup(): @@ -137,20 +138,13 @@ def main_profile(args): tbp.process_tb_profiler_args(args) variants_profile = pp.run_profiler(args) + args.notes = [] + for plugin in tbp.ProfilePlugin.__subclasses__(): + plugin().process_variants(args,variants_profile) - notes = set() - if 'rules' in args.conf: - rules_applied = pp.apply_rules(args.conf['rules'], variants_profile, just_make_note=True if args.implement_rules==False else False) - for var in variants_profile: - for ann in var.annotation: - if 'note' in ann: - notes.add(ann['note']) - - else: - rules_applied = [] - - logging.debug(rules_applied) + tbp.clean_up_duplicate_annotations(variants_profile) + # Convert variant objects to DrVariant if they cause resistance for var in variants_profile: var.convert_to_dr_element() @@ -175,20 +169,22 @@ def main_profile(args): result = tbp.create_resistance_result( args = args, - notes=notes, + notes=args.notes, lineage=barcode_result, spoligotype=spoligotype, variants=variants_profile, qc=qc, ) + for plugin in tbp.ProfilePlugin.__subclasses__(): + plugin().process_result(args,result) + if args.snp_dist: tbp.run_snp_dists(args,result) tbp.update_neighbour_snp_dist_output(args,result) - for plugin in tbp.ProfilePlugin.__subclasses__(): - plugin().run(args) + ### Create folders for results if they don't exist ### @@ -257,7 +253,7 @@ def main_update_tbdb(args): extra_args = " ".join(extra_args) with TempFilePrefix() as tmpfile: - pp.run_cmd(f"tb-profiler create_db --prefix {args.prefix} --csv mutations.csv --watchlist watchlist.csv {extra_args} --load") + pp.run_cmd(f"tb-profiler create_db --db_dir {args.db_dir} --prefix {args.prefix} --csv mutations.csv --watchlist watchlist.csv {extra_args} --load") os.chdir("../") pp.logging.info("Sucessfully updated TBDB") @@ -291,9 +287,30 @@ def main_create_db(args): pp.create_db(args,extra_files=extra_files) def main_load_library(args): - variables_file = "%(prefix)s.variables.json" % vars(args) source_dir = os.path.realpath(args.dir) - pp.load_db(variables_file,args.software_name,source_dir=source_dir) + with TempFolder() as tmpfolder: + if args.prefix.endswith(".tar.gz") or args.prefix.endswith(".zip"): + if args.prefix.endswith(".tar.gz"): + import tarfile + with tarfile.open(args.prefix, 'r:gz') as tar_ref: + tar_ref.extractall(tmpfolder) + elif args.prefix.endswith(".zip"): + import zipfile + with zipfile.ZipFile(args.prefix, 'r') as zip_ref: + zip_ref.extractall(tmpfolder) + + variables_files = glob.glob(f"{tmpfolder}/*.variables.json") + if len(variables_files)!=1: + pp.errorlog("Archive must contain only one variables file") + sys.exit() + variables_file = variables_files[0] + args.prefix = "{}/{}".format(tmpfolder,variables_file.split("/")[-1].replace(".variables.json","")) + source_dir = os.path.realpath(tmpfolder) + + + + variables_file = "%(prefix)s.variables.json" % vars(args) + pp.load_db(variables_file,args.software_name,source_dir=source_dir) @@ -459,12 +476,14 @@ for cls in tbp.ProfilePlugin.__subclasses__(): plugins.add_argument(*a['args'],**a['kwargs']) other=parser_sub.add_argument_group("Other options") +other.add_argument('--snpeff_config','--snpeff-config',type=str,help='Set the config filed used by snpEff') other.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') other.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) other.add_argument('--delly_vcf','--delly-vcf',help=argparse.SUPPRESS) other.add_argument('--supplementary_bam','--supplementary-bam',help=argparse.SUPPRESS) other.add_argument('--low_dp_mask','--low-dp-mask',help=argparse.SUPPRESS) other.add_argument('--save_low_dp_mask','--save-low-dp-mask',action='store_true',help=argparse.SUPPRESS) +other.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') other.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) other.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") other.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) @@ -488,6 +507,7 @@ parser_sub.add_argument('--threads','-t',default=1,help='Threads to use',type=in parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) @@ -513,6 +533,7 @@ parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argp parser_sub.add_argument('--threads','-t',default=1,help='Threads to use',type=int) parser_sub.add_argument('--ram',default=2,type=int_2_or_more,help='Maximum memory to use in Gb') parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) @@ -531,6 +552,7 @@ parser_sub.add_argument('--external_db','--external-db',type=str,help='Path to d parser_sub.add_argument('--dir','-d',nargs="+",default=["results"],help='Storage directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) @@ -551,6 +573,7 @@ parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--suspect',action="store_true",help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) @@ -576,6 +599,7 @@ parser_sub.add_argument('--include_original_mutation','--include-original-mutati parser_sub.add_argument('--load',action="store_true", help='Automaticaly load database') parser_sub.add_argument('--no_overwrite','--no-overwrite',action="store_true", help="Don't load if existing database with prefix exists") parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) @@ -588,6 +612,7 @@ parser_sub.add_argument('prefix',type=str,help='Prefix to the library files') parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level') parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS) @@ -599,7 +624,8 @@ parser_sub.add_argument('--repo','-r',default="https://github.com/jodyphelan/tbd parser_sub.add_argument('--branch','-b',default="tbdb",help='Branch to pull from') parser_sub.add_argument('--commit','-c',help='Git commit hash to checkout (default: latest)') parser_sub.add_argument('--match_ref','--match-ref',type=str,help='The prefix for all output files') -parser_sub.add_argument('--dir','-d',default=".",help='Storage directory') +parser_sub.add_argument('--dir','-d',default=".",help=argparse.SUPPRESS) +parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory') parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS) parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".") parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__) @@ -659,13 +685,13 @@ if hasattr(args, 'func'): args.dir = os.path.abspath(args.dir) if hasattr(args, 'db'): - if args.db=="tbdb" and not args.external_db and pp.nofile(sys.base_prefix+"/share/tbprofiler/tbdb.fasta"): + if args.db=="tbdb" and not args.external_db and pp.nofile(f"{args.db_dir}/tbdb.fasta"): logging.error("Can't find the tbdb file at %s. Please run 'tb-profiler update_tbdb' to load the default library or specify another using the '--external_db' flag" % sys.base_prefix) raise SystemExit if args.external_db: - args.conf = pp.get_db(args.software_name,args.external_db) + args.conf = pp.get_db(args.db_dir,args.external_db) else: - args.conf = pp.get_db(args.software_name,args.db) + args.conf = pp.get_db(args.db_dir,args.db) if args.conf is None: logging.error("Can't find the database %s. Please run 'tb-profiler create_db' to create the database or specify another using the '--external_db' flag" % args.db) raise SystemExit diff --git a/tbprofiler/__init__.py b/tbprofiler/__init__.py index f78cb59..9187a73 100644 --- a/tbprofiler/__init__.py +++ b/tbprofiler/__init__.py @@ -6,27 +6,7 @@ from .output import * from .snp_dists import * from .docx import * -from abc import ABC, abstractmethod +from .plugins import * +from .rules import * __version__ = "6.5.0" - - -class ProfilePlugin: - """ - A class to define a plugin for tbprofiler - """ - - @abstractmethod - def pre_process(self,args): - """Generic pre-check method""" - pass - - @abstractmethod - def run(self): - """Generic run method""" - pass - - @abstractmethod - def post_process(self,args): - """Generic post-check method""" - pass diff --git a/tbprofiler/docx.py b/tbprofiler/docx.py index 2911d27..fa8c2db 100644 --- a/tbprofiler/docx.py +++ b/tbprofiler/docx.py @@ -194,7 +194,7 @@ def write_output( fail_variants_unique = set([(f['gene'],f['change']) for f in fail_variants]) if len(fail_variants)>0: - result.notes.append(f"{len(fail_variants_unique)} resistance variants failed QC checks. These have not been used to generate the mutation report. See QC failed variants table for details.") + result.notes.append(f"{len(fail_variants_unique)} resistance variant(s) failed QC checks. These have not been used to generate the mutation report. See QC failed variants table for details.") other_variants = [] @@ -224,7 +224,7 @@ def write_output( 'rows':rows, 'comments': comments, 'qc_check': qc_check, - 'notes': ' '.join(result.notes), + 'notes': '\n\n'.join(result.notes), 'fail_variants': fail_variants, 'fail_comments': fail_comments, 'other_variants': other_variants, diff --git a/tbprofiler/plugins.py b/tbprofiler/plugins.py new file mode 100644 index 0000000..6084f5b --- /dev/null +++ b/tbprofiler/plugins.py @@ -0,0 +1,29 @@ +import argparse +from pathogenprofiler.models import Variant +from .models import ProfileResult +from typing import List + +class ProfilePlugin: + """ + A class to define a plugin for tbprofiler + """ + + def pre_process(self,args): + """Generic pre-check method""" + pass + + def run(self): + """Generic run method""" + pass + + def post_process(self,args): + """Generic post-check method""" + pass + + def process_variants(self,args: argparse.Namespace, variants: List[Variant]): + """Generic variant processing method""" + pass + + def process_result(self,args: argparse.Namespace, result: ProfileResult): + """Generic result procesing method""" + pass \ No newline at end of file diff --git a/tbprofiler/rules.py b/tbprofiler/rules.py new file mode 100644 index 0000000..291e239 --- /dev/null +++ b/tbprofiler/rules.py @@ -0,0 +1,143 @@ +from .plugins import ProfilePlugin +import logging +from pathogenprofiler.models import Variant +from .models import ProfileResult +from typing import List +import math +import argparse + + +def search_variant(variants: List[Variant], **kwargs) -> List[Variant]: + type_expansions = { + 'lof': ('frameshift_variant','stop_gained','transcript_ablation','feature_ablation') + } + found_variants = set() + for var in variants: + for csq in var.consequences: + tests = {} + for key,val in kwargs.items(): + + if isinstance(val,str): + values = [val] + else: + values = val + + for ele in values: + if ele.lower() in type_expansions: + values = values + list(type_expansions[ele.lower()]) + + test = False + if (hasattr(csq,key) and vars(csq)[key] in values): + test = True + for ann in csq.annotation: + if key in ann and ann[key] in values: + test = True + + tests[key] = test + + if all(tests.values()): + found_variants.add(var) + + return list(found_variants) + +def inactivate_drug_resistance(variant: Variant): + """ + Inactivate a drug resistance variant + """ + for ann in variant.consequences[0].annotation: + if ann['type']=='drug_resistance': + ann['type'] = 'inactivated_drug_resistance' + for csq in variant.consequences: + for ann in csq.annotation: + if ann['type']=='drug_resistance': + ann['type'] = 'inactivated_drug_resistance' + + + +class MmpR5WHORule(ProfilePlugin): + """ + Epistasis rule for mmpL5/mmpR5 + """ + + def process_variants(self,args,variants: List[Variant]): + """Generic variant processing method""" + + v1 = search_variant(variants,drug='bedaquiline',gene_name='mmpR5') + v2 = search_variant(variants,gene_name='mmpL5',type='LoF') + + v1_total_freq = math.ceil(sum([x.freq*100 for x in v1])) + v1_total_freq = min(v1_total_freq,100) + + v2_total_freq = math.ceil(sum([x.freq*100 for x in v2])) + v2_total_freq = min(v2_total_freq,100) + + if v1 and v2: + v1_changes = ", ".join([x.change for x in v1]) + v2_changes = ", ".join([x.change for x in v2]) + note = f"Loss of function mutation(s) detected in mmpL5 ({v2_changes}) which may abrogate the effect of the genetically linked mmpR5 mutation(s) ({v1_changes})." + + if v2_total_freq==100: + inactivate_drug_resistance(v1) + freq_diff = v1_total_freq - v2_total_freq + if freq_diff > 10: + note += f" However, the combined frequency of the mmpR5 mutation(s) is {freq_diff}% higher than the mmpL5 mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information." + + args.notes.append(note) + + +class eisWHORule(ProfilePlugin): + """ + Epistasis rule for mmpL5/mmpR5 + """ + + def process_variants(self,args,variants: List[Variant]): + """Generic variant processing method""" + + v1 = search_variant(variants,drug=['kanamycin','amikacin'],gene_name='eis') + v2 = search_variant(variants,gene_name='eis',type='LoF') + + v1_total_freq = math.ceil(sum([x.freq*100 for x in v1])) + v1_total_freq = min(v1_total_freq,100) + + v2_total_freq = math.ceil(sum([x.freq*100 for x in v2])) + v2_total_freq = min(v2_total_freq,100) + + if v1 and v2: + v1_changes = ", ".join([x.change for x in v1]) + v2_changes = ", ".join([x.change for x in v2]) + note = f"Loss of function mutation(s) detected in eis ({v2_changes}) which may abrogate the effect of the genetically linked eis promoter mutation(s) ({v1_changes})." + + if v2_total_freq==100: + inactivate_drug_resistance(v1) + freq_diff = v1_total_freq - v2_total_freq + if freq_diff > 10: + note += f" However, the combined frequency of the eis promoter mutation(s) is {freq_diff}% higher than the eis coding mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information." + + args.notes.append(note) + + +class SetConfidence(ProfilePlugin): + + def process_result(self, args: argparse.Namespace, result: ProfileResult): + for var in result.other_variants: + confidence = {} + for ann in var.annotation: + if 'confidence' in ann: + confidence[ann['drug']] = ann['confidence'] + + for drug in var.gene_associated_drugs: + if drug not in confidence: + if var.type=='synonymous_mutation': + confidence[drug] = 'Not Assoc W R - Interim' + else: + confidence[drug] = 'Uncertiain Significance' + var.annotation.append( + { + 'type':'who_confidence', + 'drug':drug, + 'confidence':confidence[drug], + 'comment':'' + } + ) + logging.debug(f'{var.gene_name} {var.change} does not have a confidence value for {drug}. Setting it to {confidence[drug]}') + diff --git a/tbprofiler/spoligotyping.py b/tbprofiler/spoligotyping.py index f2a70b2..63cf19f 100644 --- a/tbprofiler/spoligotyping.py +++ b/tbprofiler/spoligotyping.py @@ -5,6 +5,8 @@ import logging def spoligotype(args): + if args.bam: + args.bam_file = args.bam if "bam_file" in vars(args) and args.bam_file: result = bam2spoligotype(args.bam_file,args.files_prefix,args.conf,threads=args.threads,max_mem=args.ram) elif args.read1: