Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions 2passtools.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ channels:
- conda-forge
- bioconda
dependencies:
- minimap2
- samtools
- python=3.6
- pip
- numpy
Expand Down
125 changes: 122 additions & 3 deletions lib2pass/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'''
import os
import logging
import tempfile

import click
import click_log
Expand Down Expand Up @@ -282,10 +283,128 @@ def filter(bed_fn, output_bed_fn, exprs):


@main.command()
@click.argument('fastq-fn', required=True, nargs=1)
@click.option('-b', '--output-bam-prefix', required=True, help='Output bam prefix')
@_common_options(SCORE_MERGE_COMMON_OPTIONS)
@click.option('--stranded/--unstranded', default=True,
help=('Whether input data is stranded or unstranded. '
'direct RNA is stranded, cDNA often isn\'t'))
@click.option('--exprs', required=False, default="decision_tree_2_pred")
@click.option('-mk', '--mm2-k', default=14, type=int, help='Minimap2 minimizer kmer size')
@click.option('-mw', '--mm2-w', default=5, type=int, help='Minimap2 minimizer window size')
@click.option('--mm2-splice-flank/--mm2-no-splice-flank', default=True,
help='Assume the next base to a GU donor site tends to be A/G')
@click.option('-mc', '--mm2-noncanon-splicing-pen', default=9, type=int,
help='Penalty for non-canonical (non GU/AG) splicing')
@click.option('--mm2-junc-bonus', default=12, type=int,
help='Score bonus for a splice donor or acceptor found in guide junctions')
@click.option('-mG', '--mm2-max-intron-size', default=100_000, type=int, help='Maximum intron size')
@click.option('--mm2-end-seed-pen', default=12, type=int, help='helps avoid tiny terminal exons')
@click.option('-p', '--processes', default=1)
@click.option('-s', '--random-seed', default=None, type=int)
@click_log.simple_verbosity_option(log)
def mm2pass():
raise NotImplementedError('TODO: implement convience tool to wrap '
'minimap2 and run two pass alignment')
def mm2pass(fastq_fn, output_bam_prefix,
output_bed_fn, ref_fasta_fn, annot_bed_fn,
jad_size_threshold,
primary_splice_local_dist, canonical_motifs,
lr_window_size, lr_kfold,
lr_low_confidence_threshold, lr_high_confidence_threshold,
classifier_type, keep_all_annot, stranded, exprs,
mm2_k, mm2_w, mm2_splice_flank, mm2_noncanon_splicing_pen,
mm2_junc_bonus, mm2_max_intron_size, mm2_end_seed_pen,
processes, random_seed):
'''
2passtools mm2pass: Wrapper which performs two pass alignment using
minimap2 and 2passtools score/filter.
'''
if random_seed is not None:
np.random.seed(random_seed)

from .minimap2 import map_with_minimap2

## FIRST PASS
first_pass_fn = f'{output_bam_prefix}.1pass.bam'
log.info(f'First pass alignment will be output to {first_pass_fn}')
if annot_bed_fn is not None:
log.info(f'Using guide junctions from {annot_bed_fn}')
map_with_minimap2(
fastq_fn,
ref_fasta_fn,
output_fn=first_pass_fn,
threads=processes,
k=mm2_k, w=mm2_w,
splice_flank=mm2_splice_flank,
noncanon_pen=mm2_noncanon_splicing_pen,
canon_splice_strand='f' if stranded else 'b',
junc_bed=annot_bed_fn,
junc_bonus=mm2_junc_bonus,
max_intron_size=mm2_max_intron_size,
end_seed_pen=mm2_end_seed_pen,
)

## SCORING
log.info(f'Parsing BAM file: {first_pass_fn}')
(introns, motifs, lengths,
counts, jad_labels,
is_primary_donor, is_primary_acceptor) = parse_introns(
first_pass_fn,
primary_splice_local_dist,
stranded,
1_000_000, processes
)
res = zip(*_all_predictions(
introns, motifs, lengths, counts, jad_labels,
is_primary_donor, is_primary_acceptor,
ref_fasta_fn, annot_bed_fn,
canonical_motifs, jad_size_threshold,
lr_window_size, lr_kfold,
lr_low_confidence_threshold,
lr_high_confidence_threshold,
classifier_type, keep_all_annot, processes
))

log.info(f'Writing results to {output_bed_fn}')
with open(output_bed_fn, 'w') as bed:
for i, motif, _, c, jad, pd, pa, d1, lrd, lra, d2 in res:
chrom, start, end, strand = i
bed.write(
f'{chrom:s}\t{start:d}\t{end:d}\t{motif:s}\t{c:d}\t{strand:s}\t'
f'{jad:d}\t{pd:d}\t{pa:d}\t{d1:d}\t'
f'{lrd:.3f}\t{lra:.3f}\t{d2:d}\n'
)

## FILTERING
log.info('Getting filtered guide junctions')
b_handle, filtered_bed_fn = tempfile.mkstemp(suffix='.bed')
with open(filtered_bed_fn, 'w') as bed:
for chrom, start, end, strand, decision in apply_eval_expression(output_bed_fn, exprs):
if decision:
record = f'{chrom}\t{start}\t{end}\tintron\t0\t{strand}\n'
bed.write(record)

## SECOND PASS
second_pass_fn = f'{output_bam_prefix}.2pass.bam'
log.info(f'Second pass alignment will be output to {second_pass_fn}')
map_with_minimap2(
fastq_fn,
ref_fasta_fn,
output_fn=second_pass_fn,
threads=processes,
k=mm2_k, w=mm2_w,
splice_flank=mm2_splice_flank,
noncanon_pen=mm2_noncanon_splicing_pen,
canon_splice_strand='f' if stranded else 'b',
junc_bed=filtered_bed_fn,
junc_bonus=mm2_junc_bonus,
max_intron_size=mm2_max_intron_size,
end_seed_pen=mm2_end_seed_pen,
)
os.close(b_handle)
os.remove(filtered_bed_fn)
log.info('Complete!')





if __name__ == '__main__':
Expand Down
53 changes: 35 additions & 18 deletions lib2pass/minimap2.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
import os
import subprocess
import tempfile
import logging

MINIMAP2 = os.path.abspath(
os.path.split(__file__)[0] +
'/../external/minimap2/minimap2'
)
log = logging.getLogger('2passtools')

MINIMAP2 = os.environ.get('MINIMAP2_PATH', 'minimap2')
SAMTOOLS = os.environ.get('SAMTOOLS_PATH', 'samtools')

for program, prg_name in zip([MINIMAP2, SAMTOOLS], ['minimap2', 'samtools']):
try:
subprocess.check_call([program, '--help'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except FileNotFoundError:
raise OSError(f'{prg_name} not found at location "{program}"')


def subprocess_command(cmd, stdout_fn):
Expand All @@ -19,50 +28,58 @@ def subprocess_command(cmd, stdout_fn):
if proc.returncode:
raise subprocess.CalledProcessError(stderr.decode())
else:
return stderr.decode()
return stderr.decode().strip()


def map_with_minimap2(fastq_fn, reference_fn, output_fn, threads=1,
use_canon=False, noncanon_pen=9,
junc_bed=None, junc_bonus=9):
def map_with_minimap2(fastq_fn, reference_fn, output_fn=None, threads=1,
k=14, w=5, splice_flank=True, noncanon_pen=9,
canon_splice_strand='f',
junc_bed=None, junc_bonus=12,
max_intron_size=100000,
end_seed_pen=12):
if not os.path.exists(fastq_fn):
raise OSError('fastq_fn not found')
elif not os.path.exists(reference_fn):
raise OSError('reference_fn not found')
splice_flank = 'yes' if use_canon else 'no'
noncanon_pen = noncanon_pen if use_canon else 0
use_canon = 'f' if use_canon else 'n'
splice_flank = 'yes' if splice_flank else 'no'
s_handle, sam_fn = tempfile.mkstemp(suffix='.sam')
b_handle, bam_fn = tempfile.mkstemp(suffix='.bam')

# run minimap
minimap2_cmd = [
MINIMAP2, f'-t{threads}', '-k14', '-w5', '--splice',
'-g2000', '-G10000', '-A1', '-B2', '-O2,32', '-E1,0',
f'-C{noncanon_pen}', f'--splice-flank={splice_flank}', f'-u{use_canon}',
'-z200', '-L', '--cs=long', '-a'
MINIMAP2, f'-t{threads}', '-a', '-x', 'splice',
'-L', '--cs=long', f'-G{max_intron_size}',
f'-C{noncanon_pen}', f'--splice-flank={splice_flank}',
f'-u{canon_splice_strand}', f'--end-seed-pen={end_seed_pen}',
]
if junc_bed is not None:
minimap2_cmd += ['--junc-bed', junc_bed, f'--junc-bonus={junc_bonus}']
minimap2_cmd += [reference_fn, fastq_fn]

log.info('Running minimap2')
log.debug('cmd: ' + ' '.join(minimap2_cmd))
minimap2_stderr = subprocess_command(minimap2_cmd, sam_fn)
log.info('Mapping complete')
log.debug(f'minimap2 stderr:\n{minimap2_stderr}')

# run samtools view
samtools_view_cmd = ['samtools', 'view', '-bS', sam_fn]
samtools_view_cmd = [SAMTOOLS, 'view', '-bS', sam_fn]
samtools_view_stderr = subprocess_command(samtools_view_cmd, bam_fn)


# clean up minimap2 output
os.close(s_handle)
os.remove(sam_fn)

# run samtools sort
samtools_sort_cmd = ['samtools', 'sort', '-@', str(threads), '-o', '-', bam_fn]
samtools_sort_cmd = [SAMTOOLS, 'sort', '-@', str(threads), '-o', '-', bam_fn]
samtools_sort_stderr = subprocess_command(samtools_sort_cmd, output_fn)
log.debug(f'samtools sort stderr:\n{samtools_sort_stderr}')

# clean up samtools view output
os.close(b_handle)
os.remove(bam_fn)

# run samtools index
samtools_index_cmd = ['samtools', 'index', output_fn]
samtools_index_cmd = [SAMTOOLS, 'index', output_fn]
subprocess.check_call(samtools_index_cmd)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='2passtools',
version='0.3.1',
version='0.4.1',
description=(
'two pass alignment of long noisy reads'
),
Expand Down