Skip to content

Commit e65b235

Browse files
authored
Merge pull request #44 from pepkit/dev
Changes that will be shipped with release 0.1.8
2 parents d6d4184 + feca97e commit e65b235

15 files changed

+226
-31
lines changed

docs/changelog.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
44

5+
## [0.1.8] - 2022-08-29
6+
### Changed
7+
- the way of merging tables for multiline output format from eido convert
8+
### Added
9+
- better architecture for output formatters that goes well with **open-closed principle**
10+
- using mock in some testcases
11+
- test data in the format that was causing the errors previously
12+
### Fixed
13+
- passing plugin keyword arguments to `run_filter` function
14+
- saving output file will now work for path like `file.txt`, no need to pass full path
15+
16+
517
## [0.1.7] - 2022-08-11
618
### Changed
719
- When a validation fails, `eido` will now return all errors instead of just the first one it finds.

eido/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.7"
1+
__version__ = "0.1.8"

eido/conversion.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pkg_resources import iter_entry_points
77

88
from .exceptions import *
9+
from typing import NoReturn
910

1011
_LOGGER = getLogger(__name__)
1112

@@ -40,7 +41,7 @@ def convert_project(prj, target_format, plugin_kwargs=None):
4041
:param str target_format: the format to convert the Project object to
4142
:raise EidoFilterError: if the requested filter is not defined
4243
"""
43-
return run_filter(prj, target_format, plugin_kwargs or dict())
44+
return run_filter(prj, target_format, plugin_kwargs=plugin_kwargs or dict())
4445

4546

4647
def run_filter(prj, filter_name, verbose=True, plugin_kwargs=None):
@@ -90,23 +91,24 @@ def run_filter(prj, filter_name, verbose=True, plugin_kwargs=None):
9091
)
9192
else:
9293
# create path if it doesn't exist
93-
if not os.path.exists(result_path):
94+
if not os.path.exists(result_path) and os.path.isdir(
95+
os.path.dirname(result_path)
96+
):
9497
os.makedirs(os.path.dirname(result_path), exist_ok=True)
95-
# write to path
96-
with open(result_path, "w") as f:
97-
f.write(conv_result[result_key])
98+
save_result(result_path, conv_result[result_key])
9899

99100
if verbose:
100101
for result_key in conv_result:
101102
sys.stdout.write(conv_result[result_key])
102-
else:
103-
# simply return from the function with
104-
# conversion results
105-
pass
106103

107104
return conv_result
108105

109106

107+
def save_result(result_path: str, content: str) -> NoReturn:
108+
with open(result_path, "w") as f:
109+
f.write(content)
110+
111+
110112
def get_available_pep_filters():
111113
"""
112114
Get a list of available target formats

eido/conversion_plugins.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" built-in PEP filters """
22
from typing import Dict
3+
from .output_formatters import MultilineOutputFormatter
34

45

56
def basic_pep_filter(p, **kwargs) -> Dict[str, str]:
@@ -53,21 +54,7 @@ def csv_pep_filter(p, **kwargs) -> Dict[str, str]:
5354
5455
:param peppy.Project p: a Project to run filter on
5556
"""
56-
sample_table_path = kwargs.get("sample_table_path")
57-
subsample_table_path = kwargs.get("subsample_table_path")
58-
sample_table_repr = p.sample_table.to_csv(path_or_buf=sample_table_path)
59-
60-
s = ""
61-
if sample_table_repr is not None:
62-
s += sample_table_repr
63-
if p.subsample_table is not None:
64-
subsample_table_repr = p.subsample_table.to_csv(
65-
path_or_buf=subsample_table_path
66-
)
67-
if subsample_table_repr is not None:
68-
s += subsample_table_repr
69-
70-
return {"samples": s}
57+
return {"samples": MultilineOutputFormatter.format(p.samples)}
7158

7259

7360
def processed_pep_filter(p, **kwargs) -> Dict[str, str]:

eido/output_formatters.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Iterable, List, Union
3+
4+
from peppy.sample import Sample
5+
6+
7+
class BaseOutputFormatter(ABC):
8+
@staticmethod
9+
@abstractmethod
10+
def format(samples: List[Sample]):
11+
"""
12+
Convert the samples to correct format.
13+
"""
14+
pass
15+
16+
17+
class MultilineOutputFormatter(BaseOutputFormatter):
18+
@staticmethod
19+
def format(samples: List[Sample]) -> str:
20+
output_rows = []
21+
sample_attributes = [
22+
attribute
23+
for attribute in samples[0].keys()
24+
if not attribute.startswith("_") and not attribute == "subsample_name"
25+
]
26+
header = MultilineOutputFormatter._get_header(sample_attributes)
27+
28+
for sample in samples:
29+
30+
attribute_with_multiple_properties = MultilineOutputFormatter._get_the_name_of_the_first_attribute_with_multiple_properties(
31+
sample, sample_attributes
32+
)
33+
if attribute_with_multiple_properties:
34+
sample_rows = MultilineOutputFormatter._split_sample_to_multiple_rows(
35+
sample, sample_attributes, attribute_with_multiple_properties
36+
)
37+
output_rows.extend(sample_rows)
38+
else:
39+
one_sample_row = MultilineOutputFormatter._convert_sample_to_row(
40+
sample, sample_attributes
41+
)
42+
output_rows.append(one_sample_row)
43+
44+
return "\n".join(header + output_rows) + "\n"
45+
46+
@staticmethod
47+
def _get_header(header_column_names: List[str]):
48+
return [",".join(header_column_names)]
49+
50+
@staticmethod
51+
def _get_the_name_of_the_first_attribute_with_multiple_properties(
52+
sample: Sample, sample_attributes: List[str]
53+
) -> Union[str, None]:
54+
for attribute in sample_attributes:
55+
if MultilineOutputFormatter._sample_attribute_is_list(sample, attribute):
56+
return attribute
57+
58+
@staticmethod
59+
def _split_sample_to_multiple_rows(
60+
sample: Sample, sample_attributes: List, attribute_with_multiple_properties: str
61+
) -> Iterable[str]:
62+
"""
63+
If one sample object contains array properties instead of single value, then it will be converted
64+
to multiple rows.
65+
66+
Args:
67+
sample: Sample from project.
68+
sample_attributes: List of all sample properties names (name of columns from sample_table).
69+
70+
Returns:
71+
List of rows created from given sample object.
72+
"""
73+
number_of_samples_after_split = len(
74+
getattr(sample, attribute_with_multiple_properties)
75+
)
76+
sample_rows_after_split = []
77+
78+
for sample_index in range(number_of_samples_after_split):
79+
sample_row = MultilineOutputFormatter._convert_sample_to_row(
80+
sample, sample_attributes, sample_index
81+
)
82+
sample_rows_after_split.append(sample_row)
83+
84+
return sample_rows_after_split
85+
86+
@staticmethod
87+
def _convert_sample_to_row(
88+
sample: Sample, sample_attributes: List, sample_index: int = 0
89+
) -> str:
90+
"""
91+
Converts single sample object to CSV row.
92+
93+
Some samples have a list of values instead of single value for given attribute (column), and
94+
sample_index indicates index of the value that will be used to create a row. For samples that don't
95+
have any attributes with given names this will always be zero.
96+
97+
Args:
98+
sample: Single sample object.
99+
sample_attributes: Array of all attributes names (column names) for given sample.
100+
sample_index: Number indicating which value will be used to create row. Some samples
101+
102+
Returns:
103+
Representation of sample as a CSV row.
104+
"""
105+
sample_row = []
106+
107+
for attribute in sample_attributes:
108+
109+
if MultilineOutputFormatter._sample_attribute_is_list(sample, attribute):
110+
value = getattr(sample, attribute)[sample_index]
111+
else:
112+
value = getattr(sample, attribute, "")
113+
114+
sample_row.append(value)
115+
116+
return ",".join(sample_row)
117+
118+
@staticmethod
119+
def _sample_attribute_is_list(sample: Sample, attribute: str) -> bool:
120+
return isinstance(getattr(sample, attribute, ""), list)
121+
122+
123+
class SampleSubsampleOutputFormatter(BaseOutputFormatter):
124+
def format(self, samples: List[Sample]):
125+
pass

requirements/requirements-test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ coveralls
22
mock>=2.0.0
33
pytest>=4.6.9
44
pytest-cov>=2.8.1
5+
pytest-mock==3.6.1

tests/conftest.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22

3+
import pandas as pd
34
import pytest
45
from peppy import Project
56

@@ -21,7 +22,7 @@ def peps_path(data_path):
2122

2223
@pytest.fixture
2324
def project_file_path(peps_path):
24-
return os.path.join(peps_path, "test_cfg.yaml")
25+
return os.path.join(peps_path, "test_pep", "test_cfg.yaml")
2526

2627

2728
@pytest.fixture
@@ -52,3 +53,30 @@ def schema_sample_invalid_file_path(schemas_path):
5253
@pytest.fixture
5354
def schema_imports_file_path(schemas_path):
5455
return os.path.join(schemas_path, "test_schema_imports.yaml")
56+
57+
58+
@pytest.fixture
59+
def taxprofiler_project_path(peps_path):
60+
return os.path.join(peps_path, "taxprofiler_pep", "config.yaml")
61+
62+
63+
@pytest.fixture
64+
def taxprofiler_project(taxprofiler_project_path):
65+
return Project(taxprofiler_project_path)
66+
67+
68+
@pytest.fixture
69+
def path_to_taxprofiler_csv_multiline_output(peps_path):
70+
return os.path.join(peps_path, "taxprofiler_pep", "multiline_output.csv")
71+
72+
73+
@pytest.fixture
74+
def taxprofiler_csv_multiline_output(path_to_taxprofiler_csv_multiline_output):
75+
return pd.read_csv(path_to_taxprofiler_csv_multiline_output).to_csv(
76+
path_or_buf=None, index=None
77+
)
78+
79+
80+
@pytest.fixture
81+
def save_result_mock(mocker):
82+
return mocker.patch("eido.conversion.save_result")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pep_version: "2.0.0"
2+
sample_table: "samplesheet.csv"
3+
subsample_table: "subsamplesheet.csv"
4+
sample_table_index: "sample"
5+
subsample_table_index: "sample"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
sample,strandedness,instrument_platform,run_accession,fastq_1,fastq_2
2+
WT_REP1,reverse,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz
3+
WT_REP1,reverse,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz
4+
WT_REP2,reverse,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz
5+
RAP1_UNINDUCED_REP1,reverse,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,
6+
RAP1_UNINDUCED_REP2,reverse,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,
7+
RAP1_UNINDUCED_REP2,reverse,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,
8+
RAP1_IAA_30M_REP1,reverse,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
sample,strandedness
2+
WT_REP1,reverse
3+
WT_REP2,reverse
4+
RAP1_UNINDUCED_REP1,reverse
5+
RAP1_UNINDUCED_REP2,reverse
6+
RAP1_IAA_30M_REP1,reverse
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
sample,instrument_platform,run_accession,fastq_1,fastq_2
2+
WT_REP1,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz
3+
WT_REP1,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz
4+
WT_REP2,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz
5+
RAP1_UNINDUCED_REP1,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,
6+
RAP1_UNINDUCED_REP2,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,
7+
RAP1_UNINDUCED_REP2,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,
8+
RAP1_IAA_30M_REP1,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz

tests/test_conversions.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from eido.conversion import *
2-
import peppy
32

43

54
class TestConversionInfrastructure:
@@ -20,12 +19,27 @@ def test_plugins_are_callable(self):
2019
[callable(plugin_fun) for plugin_name, plugin_fun in avail_plugins.items()]
2120
)
2221

23-
def test_basic_filter(self, project_object):
22+
def test_basic_filter(self, save_result_mock, project_object):
2423
conv_result = run_filter(
2524
project_object,
2625
"basic",
2726
verbose=False,
2827
plugin_kwargs={"paths": {"project": "out/basic_prj.txt"}},
2928
)
30-
# the basic filter just converts to a string
29+
30+
assert save_result_mock.called
3131
assert conv_result["project"] == str(project_object)
32+
33+
def test_csv_filter(
34+
self, save_result_mock, taxprofiler_project, taxprofiler_csv_multiline_output
35+
):
36+
37+
conv_result = run_filter(
38+
taxprofiler_project,
39+
"csv",
40+
verbose=False,
41+
plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}},
42+
)
43+
44+
assert save_result_mock.called
45+
assert conv_result["samples"] == taxprofiler_csv_multiline_output

tests/test_validations.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,5 @@ def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg
136136
_check_remote_file_accessible(remote_pep_cfg)
137137
with pytest.raises(EidoValidationError):
138138
validate_project(
139-
project=Project(remote_pep_cfg),
140-
schema=schema_invalid_file_path,
139+
project=Project(remote_pep_cfg), schema=schema_invalid_file_path
141140
)

0 commit comments

Comments
 (0)