Skip to content

Commit 52a26e1

Browse files
authored
Switch type for element metadata slot to BiocFrame (#13)
1 parent 69e0170 commit 52a26e1

File tree

10 files changed

+57
-30
lines changed

10 files changed

+57
-30
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# Changelog
22

3-
## Version 0.4.0 - 0.4.3
3+
## Version 0.4.0 - 0.4.4
44

55
- Classes extend `BiocObject` from biocutils. `metadata` is a named list.
66
- Update actions to run from 3.10-3.14
77
- Support empty compressed list objects of size `n`.
88
- Implement combine generic for compressed lists.
9+
- element metadata slot is a `BiocFrame`.
910

1011
## Version 0.3.0
1112

setup.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ python_requires = >=3.9
4949
# For more information, check out https://semver.org/.
5050
install_requires =
5151
importlib-metadata; python_version<"3.8"
52-
biocutils>=0.3.1
52+
biocutils>=0.3.3
5353
numpy
54-
biocframe>=0.7.1
54+
biocframe>=0.7.2
5555

5656

5757
[options.packages.find]

src/compressed_lists/base.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from warnings import warn
55

66
import biocutils as ut
7+
from biocframe import BiocFrame
78
import numpy as np
89

910
from .partition import Partitioning
@@ -13,6 +14,31 @@
1314
__license__ = "MIT"
1415

1516

17+
def is_pandas(x: Any) -> bool:
18+
"""Check if ``x`` is a :py:class:`~pandas.DataFrame`.
19+
20+
Args:
21+
x:
22+
Any object.
23+
24+
Returns:
25+
True if ``x`` is a :py:class:`~pandas.DataFrame`.
26+
"""
27+
if hasattr(x, "dtypes"):
28+
return True
29+
30+
return False
31+
32+
33+
def _sanitize_frame(frame, num_rows: int):
34+
frame = frame if frame is not None else BiocFrame({}, number_of_rows=num_rows)
35+
36+
if is_pandas(frame):
37+
frame = BiocFrame.from_pandas(frame)
38+
39+
return frame
40+
41+
1642
def _validate_data_and_partitions(unlist_data, partition):
1743
if len(unlist_data) != partition.nobj():
1844
raise ValueError(
@@ -33,7 +59,7 @@ def __init__(
3359
unlist_data: Any,
3460
partitioning: Partitioning,
3561
element_type: Any = None,
36-
element_metadata: Optional[dict] = None,
62+
element_metadata: Optional[BiocFrame] = None,
3763
metadata: Optional[Union[Dict[str, Any], ut.NamedList]] = None,
3864
_validate: bool = True,
3965
):
@@ -64,7 +90,7 @@ class for the type of elements.
6490
self._unlist_data = unlist_data
6591
self._partitioning = partitioning
6692
self._element_type = element_type
67-
self._element_metadata = element_metadata or {}
93+
self._element_metadata = _sanitize_frame(element_metadata, len(partitioning))
6894

6995
if _validate:
7096
_validate_data_and_partitions(self._unlist_data, self._partitioning)
@@ -93,6 +119,7 @@ def __deepcopy__(self, memo=None, _nil=[]):
93119
element_type=_elem_type_copy,
94120
element_metadata=_elem_metadata_copy,
95121
metadata=_metadata_copy,
122+
_validate=False,
96123
)
97124

98125
def __copy__(self):
@@ -107,6 +134,7 @@ def __copy__(self):
107134
element_type=self._element_type,
108135
element_metadata=self._element_metadata,
109136
metadata=self._metadata,
137+
_validate=False,
110138
)
111139

112140
def copy(self):
@@ -150,8 +178,7 @@ def __repr__(self) -> str:
150178
_etype_name = self._element_type.__name__
151179
output += ", element_type=" + _etype_name
152180

153-
if len(self._element_metadata) > 0:
154-
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
181+
output += ", element_metadata=" + self._element_metadata.__repr__()
155182

156183
if len(self._metadata) > 0:
157184
output += ", metadata=" + ut.print_truncated_dict(self._metadata)
@@ -178,7 +205,7 @@ def __str__(self) -> str:
178205

179206
output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n"
180207

181-
output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
208+
output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
182209
output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
183210

184211
return output
@@ -303,14 +330,14 @@ def unlist_data(self, unlist_data: Any):
303330
######>> element metadata <<#######
304331
###################################
305332

306-
def get_element_metadata(self) -> dict:
333+
def get_element_metadata(self) -> BiocFrame:
307334
"""
308335
Returns:
309336
Dictionary of metadata for each element in this object.
310337
"""
311338
return self._element_metadata
312339

313-
def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -> CompressedList:
340+
def set_element_metadata(self, element_metadata: BiocFrame, in_place: bool = False) -> CompressedList:
314341
"""Set new element metadata.
315342
316343
Args:
@@ -324,19 +351,20 @@ def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -
324351
A modified ``CompressedList`` object, either as a copy of the original
325352
or as a reference to the (in-place-modified) original.
326353
"""
327-
if not isinstance(element_metadata, dict):
328-
raise TypeError(f"`element_metadata` must be a dictionary, provided {type(element_metadata)}.")
354+
if not isinstance(element_metadata, BiocFrame):
355+
raise TypeError(f"`element_metadata` must be a BiocFrame, provided {type(element_metadata)}.")
356+
329357
output = self._define_output(in_place)
330-
output._element_metadata = element_metadata
358+
output._element_metadata = _sanitize_frame(element_metadata, len(self._partitioning))
331359
return output
332360

333361
@property
334-
def element_metadata(self) -> dict:
362+
def element_metadata(self) -> BiocFrame:
335363
"""Alias for :py:attr:`~get_element_metadata`."""
336364
return self.get_element_metadata()
337365

338366
@element_metadata.setter
339-
def element_metadata(self, element_metadata: dict):
367+
def element_metadata(self, element_metadata: BiocFrame):
340368
"""Alias for :py:attr:`~set_element_metadata` with ``in_place = True``.
341369
342370
As this mutates the original object, a warning is raised.
@@ -576,7 +604,7 @@ def extract_subset(self, indices: Sequence[int]) -> CompressedList:
576604
new_data,
577605
new_partitioning,
578606
element_type=self._element_type,
579-
element_metadata={k: v for k, v in self._element_metadata.items() if k in indices},
607+
element_metadata=self._element_metadata[indices,],
580608
metadata=self._metadata.copy(),
581609
)
582610

src/compressed_lists/biocframe_list.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ def __repr__(self) -> str:
133133
else self._element_type
134134
)
135135

136-
if len(self._element_metadata) > 0:
137-
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
136+
output += ", element_metadata=" + self._element_metadata.__repr__()
138137

139138
if len(self._metadata) > 0:
140139
output += ", metadata=" + ut.print_truncated_dict(self._metadata)
@@ -155,7 +154,7 @@ def __str__(self) -> str:
155154

156155
output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n"
157156

158-
output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
157+
output += f"element_metadata({str(len(self._element_metadata))} rows): {ut.print_truncated_list(list(self._element_metadata.get_column_names()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
159158
output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
160159

161160
return output

tests/test_base.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from compressed_lists import CompressedList
7+
from biocframe import BiocFrame
78

89
__author__ = "Jayaram Kancherla"
910
__copyright__ = "Jayaram Kancherla"
@@ -95,14 +96,13 @@ def test_base_metadata(base_list):
9596
base_list.metadata = meta
9697
assert base_list.metadata == ut.NamedList.from_dict({"source": "test"})
9798

98-
el_meta = {"info": "details"}
99+
el_meta = BiocFrame({"score": [1, 2, 3]})
99100
cl_el_meta = base_list.set_element_metadata(el_meta, in_place=False)
100-
assert base_list.element_metadata == {}
101-
assert cl_el_meta.element_metadata == {"info": "details"}
101+
assert len(base_list.element_metadata) == 3
102+
assert cl_el_meta.element_metadata.get_column("score") == el_meta.get_column("score")
102103

103-
with pytest.warns(UserWarning, match="Setting property 'element_metadata'"):
104-
base_list.element_metadata = el_meta
105-
assert base_list.element_metadata == {"info": "details"}
104+
with pytest.raises(Exception):
105+
base_list.element_metadata = {"info": "details"}
106106

107107

108108
def test_base_copying(base_list):

tests/test_comp_biocframe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def test_bframe_typed_list_column():
4646

4747
def test_split_biocframe(frame_data):
4848
frame_data.set_column("groups", [0, 0, 1], in_place=True)
49-
print(frame_data)
5049
clist = splitAsCompressedList(frame_data, groups_or_partitions=frame_data.get_column("groups"))
5150

5251
assert isinstance(clist, CompressedSplitBiocFrameList)

tests/test_comp_bool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pytest
2-
from biocutils.boolean_list import BooleanList
2+
from biocutils import BooleanList
33

44
from compressed_lists import CompressedBooleanList
55

tests/test_comp_float.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pytest
2-
from biocutils.float_list import FloatList
2+
from biocutils import FloatList
33

44
from compressed_lists import CompressedFloatList
55

tests/test_comp_int.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import numpy as np
22
import pytest
3-
from biocutils.integer_list import IntegerList
3+
from biocutils import IntegerList
44

55
from compressed_lists import CompressedIntegerList, Partitioning
66

tests/test_comp_str.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pytest
2-
from biocutils.string_list import StringList
2+
from biocutils import StringList
33

44
from compressed_lists import CompressedStringList
55

0 commit comments

Comments
 (0)