Skip to content

Commit

Permalink
snp_dist_search_all
Browse files Browse the repository at this point in the history
  • Loading branch information
jodyphelan committed Mar 23, 2024
1 parent efc3190 commit 5cf1f3d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 29 deletions.
1 change: 1 addition & 0 deletions tb-profiler
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ algorithm.add_argument('--call_whole_genome',action="store_true",help="Call vari
algorithm.add_argument('--snp_dist',type=int,help="Store variant set and get all samples with snp distance less than this cutoff (experimental feature)")
algorithm.add_argument('--snp_diff_db',type=str,help=argparse.SUPPRESS)
algorithm.add_argument('--snp_diff_no_store',action='store_true',help=argparse.SUPPRESS)
algorithm.add_argument('--snp_dist_search_all',action='store_true',help=argparse.SUPPRESS)
algorithm.add_argument('--no_trim',action="store_true",help="Don't trim files using trimmomatic")
algorithm.add_argument('--no_coverage_qc',action="store_true",help="Don't collect flagstats")
algorithm.add_argument('--no_samclip',action="store_false",help="Don't remove clipped reads from variant calling")
Expand Down
35 changes: 6 additions & 29 deletions tbprofiler/snp_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,6 @@
from .models import ProfileResult, LinkedSample
from typing import List, Tuple

# def extract_variant_set_old(vcf_file, exclude_bed, min_cov=10, min_freq=0.8):
# ref_diffs = set()
# missing = set()
# for l in cmd_out(f"bcftools view -V indels -T ^{exclude_bed} {vcf_file} | bcftools query -f '%POS[\t%GT:%AD]\n'"):
# if l[0]=="#": continue
# row = l.strip().split()
# pos = int(row[0])
# gt,ad = row[1].split(":")
# if ad==".": # delly
# continue
# if gt==".":
# missing.add(pos)
# continue
# ad = [int(x) for x in ad.split(",")]
# if sum(ad)<=min_cov:
# missing.add(pos)
# continue
# adf = sorted([float(x/sum(ad)) for x in ad])
# if adf[-1]<min_freq:
# missing.add(pos)
# continue
# if gt=="1/1":
# ref_diffs.add(int(pos))

# return ref_diffs,missing

def extract_variant_set(vcf_file: str) -> Tuple[set,set]:
ref_diffs = set()
missing = set()
Expand Down Expand Up @@ -99,9 +73,12 @@ def store(self,result: ProfileResult, vcf_file: str) -> None:
self.conn.commit()
self.diffs = diffs
self.missing = missing
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[LinkedSample]:
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20, snp_dist_search_all: bool = False) -> List[LinkedSample]:
logging.info("Searching for close samples in %s" % self.filename)
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
if snp_dist_search_all:
self.c.execute("SELECT sample, diffs, missing FROM variants")
else:
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
self.diffs,self.missing = extract_variant_set(vcf_file)
sample_dists = []
for s,d,m in tqdm(self.c.fetchall(),desc="Searching for close samples"):
Expand Down Expand Up @@ -141,7 +118,7 @@ def run_snp_dists(args: argparse.Namespace,result: ProfileResult) -> None:
lock = f"{dbname}.lock"
with filelock.SoftFileLock(lock):
db = DB(dbname)
linked_samples = db.search(result,input_vcf,args.snp_dist)
linked_samples = db.search(result,input_vcf,args.snp_dist,args.snp_dist_search_all)
if not args.snp_diff_no_store:
db.store(result,input_vcf)
result.linked_samples = [d for d in linked_samples if d.sample!=result.id]
Expand Down

0 comments on commit 5cf1f3d

Please sign in to comment.