Skip to content

Commit 2963adb

Browse files
Backport PR #837 to 1.0.x (#842)
* Concatenate_h5mu: fix writing multi-dimensional annotation frames. (#837) * Concatenate_h5mu: fix writing multidomensional annotation frames. * Undo some changes * Update test * Add PR number * Trigger CI * Update CHANGELOG * deploy: 57add3f * Update CHANGELOG --------- Co-authored-by: DriesSchaumont <[email protected]>
1 parent 0ead6c6 commit 2963adb

File tree

573 files changed

+2368
-2251
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

573 files changed

+2368
-2251
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
# openpipelines 1.0.2
2+
3+
## BUG FIXES
4+
5+
* `dataflow/concatenate_h5mu`: fix writing out multidimensional annotation dataframes (e.g. `.varm`) that had their
6+
data dtype (dtype) changed as a result of adding more observations after concatenation, causing `TypeError`.
7+
One notable example of this happening is when one of the samples does not have a multimodal annotation dataframe
8+
which is present in another sample; causing the values being filled with `NA` (PR #842, backported from PR #837).
9+
110
# openpipelines 1.0.1
211

312
## BUG FIXES

src/dataflow/concatenate_h5mu/script.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,15 @@ def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnD
220220
output_index = getattr(output, matrix_name).index
221221
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, output_index)
222222
if concatenated_matrix.empty:
223-
concatenated_matrix.index = output_index
223+
concatenated_matrix.index = output_index
224+
225+
# Even though we did not touch the varm and obsm matrices that were already present,
226+
# the joining of observations might have caused a dtype change in these matrices as well
227+
# so these also need to be casted to a writable dtype...
228+
for multidim_name, multidim_data in getattr(output, f"{matrix_name}m").items():
229+
new_data = cast_to_writeable_dtype(multidim_data) if isinstance(multidim_data, pd.DataFrame) else multidim_data
230+
getattr(output, f"{matrix_name}m")[multidim_name] = new_data
231+
224232
# Write the conflicts to the output
225233
for conflict_name, conflict_data in conflicts.items():
226234
getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data

src/dataflow/concatenate_h5mu/test.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -478,9 +478,14 @@ def test_concat_different_columns_per_modality_and_per_sample(run_component, sam
478478
non_shared_features = data_sample1.var_names.difference(data_sample2.var_names)
479479
assert concatenated_data.var.loc[non_shared_features, 'mod2:Feat4'].isna().all()
480480

481-
@pytest.mark.parametrize("test_value,expected", [("bar", "bar"), (True, True), (0.1, 0.1), (np.nan, pd.NA)])
481+
@pytest.mark.parametrize("test_value,test_value_dtype,expected", [("bar", "str", "bar"),
482+
(True, pd.BooleanDtype(), True),
483+
(1, pd.Int16Dtype(), 1),
484+
(0.1, float, 0.1),
485+
(0.1, np.float64, 0.1),
486+
(np.nan, np.float64, pd.NA)])
482487
def test_concat_remove_na(run_component, sample_1_h5mu, sample_2_h5mu,
483-
write_mudata_to_file, random_h5mu_path, test_value, expected,
488+
write_mudata_to_file, random_h5mu_path, test_value, test_value_dtype, expected,
484489
change_column_contents):
485490
"""
486491
Test concatenation of samples where the column from one sample contains NA values
@@ -492,7 +497,7 @@ def test_concat_remove_na(run_component, sample_1_h5mu, sample_2_h5mu,
492497
"""
493498
change_column_contents(sample_1_h5mu, 'var', 'Shared_feat', {'mod1': np.nan, 'mod2': np.nan})
494499
change_column_contents(sample_2_h5mu, 'var', 'Shared_feat', {'mod1': test_value, 'mod2': np.nan})
495-
500+
sample_2_h5mu.var['Shared_feat'] = sample_2_h5mu.var['Shared_feat'].astype(test_value_dtype)
496501
output_path = random_h5mu_path()
497502

498503
run_component([
@@ -547,9 +552,17 @@ def test_concat_invalid_h5_error_includes_path(run_component, tmp_path,
547552
err.value.stdout.decode('utf-8'))
548553

549554

550-
@pytest.mark.parametrize("test_value_1,test_value_2,expected", [(1, "1", pd.CategoricalDtype(categories=['1.0', '1']))])
555+
@pytest.mark.parametrize("test_value_1,value_1_dtype,test_value_2,value_2_dtype,expected",
556+
[(1, float, "1", str, pd.CategoricalDtype(categories=['1.0', '1'])),
557+
(1, np.float64, "1", str, pd.CategoricalDtype(categories=['1.0', '1'])),
558+
(1, pd.Int16Dtype(), 2.0, pd.Int16Dtype(), pd.Int64Dtype()),
559+
(True, bool, False, bool, pd.BooleanDtype()),
560+
(True, pd.BooleanDtype(), False, bool, pd.BooleanDtype()),
561+
("foo", str, "bar", str, pd.CategoricalDtype(categories=['bar', 'foo'])),
562+
]
563+
)
551564
def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_column_contents,
552-
sample_1_h5mu, sample_2_h5mu, test_value_1, test_value_2,
565+
sample_1_h5mu, sample_2_h5mu, test_value_1, value_1_dtype, test_value_2, value_2_dtype,
553566
expected, random_h5mu_path):
554567
"""
555568
Test joining column with different dtypes to make sure that they are writable.
@@ -561,7 +574,10 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
561574
for the test column in mod2 is still writable.
562575
"""
563576
change_column_contents(sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1})
577+
sample_1_h5mu.var['test_col'] = sample_1_h5mu.var['test_col'].astype(value_1_dtype)
564578
change_column_contents(sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2})
579+
sample_2_h5mu.var['test_col'] = sample_2_h5mu.var['test_col'].astype(value_2_dtype)
580+
565581
output_file = random_h5mu_path()
566582
run_component([
567583
"--input_id", "sample1;sample2",
@@ -573,6 +589,40 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
573589
concatenated_data = md.read(output_file)
574590
assert concatenated_data['mod2'].var['test_col'].dtype == expected
575591

592+
593+
@pytest.mark.parametrize("test_value,value_dtype,expected",
594+
[(1, float, pd.Int64Dtype()),
595+
(1, np.float64, pd.Int64Dtype()),
596+
(1, pd.Int16Dtype(), pd.Int16Dtype()),
597+
(True, bool, pd.BooleanDtype()),
598+
(True, pd.BooleanDtype(), pd.BooleanDtype()),
599+
("foo", str, pd.CategoricalDtype(categories=['foo'])),
600+
]
601+
)
602+
def test_concat_dtypes_per_modality_multidim(run_component, write_mudata_to_file,
603+
sample_1_h5mu, sample_2_h5mu, test_value, value_dtype,
604+
expected, random_h5mu_path):
605+
"""
606+
Test if the result of concatenation is still writable when the input already contain
607+
data in .varm and this data is kept. Because we are joining observations, the dtype of this
608+
data may change and the result might not be writable anymore
609+
"""
610+
611+
sample_1_h5mu['mod1'].varm['test_df'] = pd.DataFrame(index=sample_1_h5mu['mod1'].var_names)
612+
sample_1_h5mu['mod1'].varm['test_df']['test_col'] = test_value
613+
sample_1_h5mu['mod1'].varm['test_df']['test_col'] = sample_1_h5mu['mod1'].varm['test_df']['test_col'].astype(value_dtype)
614+
615+
output_file = random_h5mu_path()
616+
run_component([
617+
"--input_id", "sample1;sample2",
618+
"--input", write_mudata_to_file(sample_1_h5mu),
619+
"--input", write_mudata_to_file(sample_2_h5mu),
620+
"--output", output_file,
621+
"--other_axis_mode", "move"
622+
])
623+
concatenated_data = md.read(output_file)
624+
assert concatenated_data['mod1'].varm['test_df']['test_col'].dtype == expected
625+
576626
@pytest.mark.parametrize("test_value_1,test_value_2,expected", [(1, "1", pd.CategoricalDtype(categories=['1.0', '1']))])
577627
def test_concat_dtypes_global(run_component, write_mudata_to_file, change_column_contents,
578628
sample_1_h5mu, sample_2_h5mu, test_value_1, test_value_2,
@@ -622,6 +672,8 @@ def test_non_overlapping_modalities(run_component, sample_2_h5mu, sample_3_h5mu,
622672
"--output", output_path,
623673
"--other_axis_mode", "move"
624674
])
675+
output_data = md.read(output_path)
676+
assert set(output_data.mod.keys()) == {"mod1", "mod2", "mod3"}
625677

626678

627679
def test_resolve_annotation_conflict_missing_column(run_component, sample_1_h5mu,

target/docker/annotate/popv/.config.vsh.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
functionality:
22
name: "popv"
33
namespace: "annotate"
4-
version: "1.0.1"
4+
version: "1.0.2"
55
authors:
66
- name: "Matthias Beyens"
77
roles:
@@ -352,6 +352,6 @@ info:
352352
output: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv"
353353
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv/popv"
354354
viash_version: "0.8.6"
355-
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
355+
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
356356
git_remote: "https://github.com/openpipelines-bio/openpipeline"
357-
git_tag: "1.0.0-1-g8ba584e550"
357+
git_tag: "1.0.1-3-g57add3ff13"

target/docker/annotate/popv/popv

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
# popv 1.0.1
3+
# popv 1.0.2
44
#
55
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
66
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -174,7 +174,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"
174174

175175
# ViashHelp: Display helpful explanation about this executable
176176
function ViashHelp {
177-
echo "popv 1.0.1"
177+
echo "popv 1.0.2"
178178
echo ""
179179
echo "Performs popular major vote cell typing on single cell sequence data using"
180180
echo "multiple algorithms. Note that this is a one-shot version of PopV."
@@ -503,10 +503,10 @@ RUN cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \
503503
504504
LABEL org.opencontainers.image.authors="Matthias Beyens, Robrecht Cannoodt"
505505
LABEL org.opencontainers.image.description="Companion container for running component annotate popv"
506-
LABEL org.opencontainers.image.created="2024-06-18T14:14:56Z"
506+
LABEL org.opencontainers.image.created="2024-07-22T07:16:09Z"
507507
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
508-
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
509-
LABEL org.opencontainers.image.version="1.0.1"
508+
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
509+
LABEL org.opencontainers.image.version="1.0.2"
510510
511511
VIASHDOCKER
512512
}
@@ -661,7 +661,7 @@ while [[ $# -gt 0 ]]; do
661661
shift 1
662662
;;
663663
--version)
664-
echo "popv 1.0.1"
664+
echo "popv 1.0.2"
665665
exit
666666
;;
667667
--input)

target/docker/cluster/leiden/.config.vsh.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
functionality:
22
name: "leiden"
33
namespace: "cluster"
4-
version: "1.0.1"
4+
version: "1.0.2"
55
authors:
66
- name: "Dries De Maeyer"
77
roles:
@@ -230,6 +230,6 @@ info:
230230
output: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden"
231231
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden/leiden"
232232
viash_version: "0.8.6"
233-
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
233+
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
234234
git_remote: "https://github.com/openpipelines-bio/openpipeline"
235-
git_tag: "1.0.0-1-g8ba584e550"
235+
git_tag: "1.0.1-3-g57add3ff13"

target/docker/cluster/leiden/leiden

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
# leiden 1.0.1
3+
# leiden 1.0.2
44
#
55
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
66
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -173,7 +173,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"
173173

174174
# ViashHelp: Display helpful explanation about this executable
175175
function ViashHelp {
176-
echo "leiden 1.0.1"
176+
echo "leiden 1.0.2"
177177
echo ""
178178
echo "Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy"
179179
echo "framework] [Wolf18]."
@@ -460,10 +460,10 @@ RUN pip install --upgrade pip && \
460460
461461
LABEL org.opencontainers.image.authors="Dries De Maeyer"
462462
LABEL org.opencontainers.image.description="Companion container for running component cluster leiden"
463-
LABEL org.opencontainers.image.created="2024-06-18T14:14:57Z"
463+
LABEL org.opencontainers.image.created="2024-07-22T07:16:05Z"
464464
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
465-
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
466-
LABEL org.opencontainers.image.version="1.0.1"
465+
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
466+
LABEL org.opencontainers.image.version="1.0.2"
467467
468468
VIASHDOCKER
469469
}
@@ -618,7 +618,7 @@ while [[ $# -gt 0 ]]; do
618618
shift 1
619619
;;
620620
--version)
621-
echo "leiden 1.0.1"
621+
echo "leiden 1.0.2"
622622
exit
623623
;;
624624
--input)

target/docker/compression/compress_h5mu/.config.vsh.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
functionality:
22
name: "compress_h5mu"
33
namespace: "compression"
4-
version: "1.0.1"
4+
version: "1.0.2"
55
authors:
66
- name: "Dries Schaumont"
77
roles:
@@ -171,6 +171,6 @@ info:
171171
output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu"
172172
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu/compress_h5mu"
173173
viash_version: "0.8.6"
174-
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
174+
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
175175
git_remote: "https://github.com/openpipelines-bio/openpipeline"
176-
git_tag: "1.0.0-1-g8ba584e550"
176+
git_tag: "1.0.1-3-g57add3ff13"

target/docker/compression/compress_h5mu/compress_h5mu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
# compress_h5mu 1.0.1
3+
# compress_h5mu 1.0.2
44
#
55
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
66
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -173,7 +173,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"
173173

174174
# ViashHelp: Display helpful explanation about this executable
175175
function ViashHelp {
176-
echo "compress_h5mu 1.0.1"
176+
echo "compress_h5mu 1.0.2"
177177
echo ""
178178
echo "Compress a MuData file."
179179
echo ""
@@ -423,10 +423,10 @@ RUN pip install --upgrade pip && \
423423
424424
LABEL org.opencontainers.image.authors="Dries Schaumont"
425425
LABEL org.opencontainers.image.description="Companion container for running component compression compress_h5mu"
426-
LABEL org.opencontainers.image.created="2024-06-18T14:14:58Z"
426+
LABEL org.opencontainers.image.created="2024-07-22T07:16:06Z"
427427
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
428-
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
429-
LABEL org.opencontainers.image.version="1.0.1"
428+
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
429+
LABEL org.opencontainers.image.version="1.0.2"
430430
431431
VIASHDOCKER
432432
}
@@ -581,7 +581,7 @@ while [[ $# -gt 0 ]]; do
581581
shift 1
582582
;;
583583
--version)
584-
echo "compress_h5mu 1.0.1"
584+
echo "compress_h5mu 1.0.2"
585585
exit
586586
;;
587587
--input)

target/docker/compression/tar_extract/.config.vsh.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
functionality:
22
name: "tar_extract"
33
namespace: "compression"
4-
version: "1.0.1"
4+
version: "1.0.2"
55
arguments:
66
- type: "file"
77
name: "--input"
@@ -107,6 +107,6 @@ info:
107107
output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract"
108108
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract/tar_extract"
109109
viash_version: "0.8.6"
110-
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
110+
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
111111
git_remote: "https://github.com/openpipelines-bio/openpipeline"
112-
git_tag: "1.0.0-1-g8ba584e550"
112+
git_tag: "1.0.1-3-g57add3ff13"

0 commit comments

Comments
 (0)