Skip to content

Commit f6afb13

Browse files
DriesSchaumontgithub-actions[bot]
authored andcommitted
deploy: aa74d10
1 parent e56b8a4 commit f6afb13

File tree

576 files changed

+4070
-3689
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

576 files changed

+4070
-3689
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# openpipelines 1.0.0-rc6
2+
3+
## BUG FIXES
4+
5+
* `dataflow/concatenate_h5mu`: fix regression bug where observations are no longer linked to the correct metadata
6+
after concatenation (PR #807)
7+
18
# openpipelines 1.0.0-rc5
29

310
## BUG FIXES

src/base/openpipelinetestutils/utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ def _get_columns_in_all_modalities(annotation_object, axis_string: str):
4747
if column_name not in global_columns]
4848
extra_cols_to_remove += [column_name for column_name in column_names
4949
if column_name in global_columns]
50-
axis_setter(annotation_object, axis_getter(annotation_object).drop(extra_cols_to_remove,
51-
axis="columns",
52-
inplace=False))
50+
if modality_name:
51+
axis_setter(annotation_object, axis_getter(annotation_object).drop(extra_cols_to_remove,
52+
axis="columns",
53+
inplace=False))
5354

5455
for mod_name in modality_names:
5556
modality = annotation_object.mod[mod_name]

src/dataflow/concat/config.vsh.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ functionality:
6363
- path: /resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu
6464
platforms:
6565
- type: docker
66-
image: python:3.10-slim
66+
image: python:3.11-slim
6767
setup:
6868
- type: apt
6969
packages:
@@ -72,9 +72,9 @@ platforms:
7272
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
7373
packages:
7474
- pandas~=2.1.1
75+
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
7576
test_setup:
7677
- type: python
77-
__merge__: [ /src/base/requirements/viashpy.yaml, .]
7878
packages:
7979
- muon
8080
- type: native

src/dataflow/concatenate_h5mu/config.vsh.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ functionality:
6262
- path: /resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu
6363
platforms:
6464
- type: docker
65-
image: python:3.10-slim
65+
image: python:3.11-slim
6666
setup:
6767
- type: apt
6868
packages:
@@ -71,9 +71,9 @@ platforms:
7171
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
7272
packages:
7373
- pandas~=2.1.1
74+
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
7475
test_setup:
7576
- type: python
76-
__merge__: [ /src/base/requirements/viashpy.yaml, .]
7777
packages:
7878
- muon
7979
- type: native

src/dataflow/concatenate_h5mu/script.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) ->
121121
is_duplicated = pool.map(nunique, iter(numpy_array))
122122
return any(is_duplicated)
123123

124-
def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \
124+
def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index) \
125125
-> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]:
126126
"""
127127
Merge matrices by combining columns that have the same name.
@@ -152,7 +152,7 @@ def get_first_non_na_value_vector(df):
152152
def split_conflicts_and_concatenated_columns(n_processes: int,
153153
matrices: dict[str, pd.DataFrame],
154154
column_names: Iterable[str],
155-
align_to: pd.Index | None = None) -> \
155+
align_to: pd.Index) -> \
156156
tuple[dict[str, pd.DataFrame], pd.DataFrame]:
157157
"""
158158
Retrieve columns with the same name from a list of dataframes which are
@@ -172,8 +172,7 @@ def split_conflicts_and_concatenated_columns(n_processes: int,
172172
join="outer", sort=False)
173173
if any_row_contains_duplicate_values(n_processes, concatenated_columns):
174174
concatenated_columns.columns = columns.keys() # Use the sample id as column name
175-
if align_to is not None:
176-
concatenated_columns = concatenated_columns.reindex(align_to, copy=False)
175+
concatenated_columns = concatenated_columns.reindex(align_to, copy=False)
177176
conflicts[f'conflict_{column_name}'] = concatenated_columns
178177
else:
179178
unique_values = get_first_non_na_value_vector(concatenated_columns)
@@ -182,8 +181,7 @@ def split_conflicts_and_concatenated_columns(n_processes: int,
182181
return conflicts, pd.DataFrame(index=align_to)
183182
concatenated_matrix = pd.concat(concatenated_matrix, join="outer",
184183
axis=1, sort=False)
185-
if align_to is not None:
186-
concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False)
184+
concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False)
187185
return conflicts, concatenated_matrix
188186

189187
def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame:
@@ -220,8 +218,7 @@ def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnD
220218
for matrix_name in matrices_to_parse:
221219
matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()}
222220
output_index = getattr(output, matrix_name).index
223-
align_to = output_index if matrix_name == "var" else None
224-
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to)
221+
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, output_index)
225222
if concatenated_matrix.empty:
226223
concatenated_matrix.index = output_index
227224
# Write the conflicts to the output
@@ -238,7 +235,7 @@ def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str |
238235
other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData:
239236

240237
concat_modes = {
241-
"move": None,
238+
"move": "unique",
242239
}
243240
other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode)
244241

@@ -247,7 +244,7 @@ def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str |
247244
try:
248245
mod_data[input_id] = mu.read_h5ad(input_file, mod=mod)
249246
except KeyError as e: # Modality does not exist for this sample, skip it
250-
if f"Unable to open object '{mod}' doesn't exist" not in str(e):
247+
if f"Unable to synchronously open object (object '{mod}' doesn't exist)" not in str(e):
251248
raise e
252249
pass
253250
check_observations_unique(mod_data.values())

0 commit comments

Comments
 (0)