Skip to content

Account for different 'layout's in sampletable #419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: v1.12rc
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 14 additions & 24 deletions lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from lib import utils
from snakemake.shell import shell
from snakemake.io import expand
from lib import helpers
from pathlib import Path

# List of possible keys in config that are to be interpreted as paths
PATH_KEYS = [
Expand Down Expand Up @@ -623,8 +625,6 @@ def load_config(config, missing_references_ok=False):
Resolves any included references directories/files and runs the deprecation
handler.
"""
if isinstance(config, str):
config = yaml.load(open(config), Loader=yaml.FullLoader)

# Here we populate a list of reference sections. Items later on the list
# will have higher priority
Expand Down Expand Up @@ -722,32 +722,22 @@ def is_paired_end(sampletable, sample):
# We can't fall back to detecting PE based on two fastq files provided for
# each sample when it's an SRA sampletable (which only has SRR accessions).
#
# So detect first detect if SRA sampletable based on presence of "Run"
# column and all values of that column starting with "SRR", and then raise
# an error if the Layout column does not exist.

if "Run" in sampletable.columns:
if all(sampletable["Run"].str.startswith("SRR")):
if "Layout" not in sampletable.columns and "layout" not in sampletable.columns:
raise ValueError(
"Sampletable appears to be SRA, but no 'Layout' column "
"found. This is required to specify single- or paired-end "
"libraries.")
# So instead first detect if there is in fact a second fastq file listed,
# and if not then check if the layout of the library is listed

row = sampletable.set_index(sampletable.columns[0]).loc[sample]
if 'orig_filename_R2' in row:
return True
if 'layout' in row and 'LibraryLayout' in row:
raise ValueError("Expecting column 'layout' or 'LibraryLayout', "
"not both")
try:
return row['layout'].lower() in ['pe', 'paired']
except KeyError:
pass
try:
return row['LibraryLayout'].lower() in ['pe', 'paired']
except KeyError:
pass
if "Run" in sampletable.columns:
if all(sampletable["Run"].str.startswith("SRR")):
layout_columns = set(sampletable.columns).intersection(['layout', 'LibraryLayout', 'Layout'])
if len(layout_columns) != 1:
raise ValueError("Expected exactly one of ['layout', 'LibraryLayout', 'Layout'] in sample table")
layout_column = list(layout_columns)[0]
try:
return row[layout_column].lower() in ['pe', 'paired']
except KeyError:
pass
return False


Expand Down
20 changes: 20 additions & 0 deletions lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from snakemake.shell import shell
from snakemake.io import expand, regex
from lib import common
import os


class ConfigurationError(Exception):
Expand Down Expand Up @@ -203,3 +204,22 @@ def strand_arg_lookup(config, lookup):
keys = list(lookup.keys())
raise KeyError(f"'{config.stranded}' not one of {keys}")
return lookup[config.stranded]

def get_top_level_dir(start_dir=None):
# Start from the specified directory or current working directory if none is given
current_dir = os.path.abspath(start_dir or os.getcwd())
# Search current directory and above for targets
while True:
# Check if the target directories exists in the current directory
if (os.path.isdir(os.path.join(current_dir, ".git")) and os.path.isdir(os.path.join(current_dir, "workflows"))):
return current_dir
# Move up one level
parent_dir = os.path.dirname(current_dir)
# Stop if we've reached the root directory
if current_dir == parent_dir:
break
current_dir = parent_dir
#TODO: Check for other edge cases?

return None

23 changes: 20 additions & 3 deletions lib/test_suite.py → test/tests/test_suite.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
import os
import pprint
import sys
import subprocess
top_level_dir = subprocess.run(["dirname $(dirname $(pwd))"], shell=True, capture_output=True, text=True).stdout.strip()
print("top level dir: ", top_level_dir)
sys.path.insert(0, top_level_dir)
import pytest
from textwrap import dedent
from . import common
from lib import common, helpers, patterns_targets

# Make config object that can be re-used for any test
@pytest.fixture
def config(request):
config_path = request.param
config = common.load_config(config_path, test=True)
return patterns_targets.RNASeqConfig(config, config.get('patterns', '../workflows/rnaseq/config/rnaseq_patterns.yaml'))

# Call helpers.detect_layout(), which implicitly tests common.is_paired_end()
# TODO: Make assertion condition NOT hard coded in to work with current example table
@pytest.mark.parametrize("config", ['../../workflows/rnaseq/config/config.yaml'], indirect=True)
def test_is_paired_end(config):
is_paired = helpers.detect_layout(config.sampletable) == 'PE'
assert not is_paired, f"Test failed, is_paired = {is_paired}"

def test_config_loading(tmpdir):
f0 = tmpdir.mkdir('subdir').join('file0.yaml')
Expand Down