44from warnings import warn
55
66import biocutils as ut
7+ from biocframe import BiocFrame
78import numpy as np
89
910from .partition import Partitioning
1314__license__ = "MIT"
1415
1516
17+ def is_pandas (x : Any ) -> bool :
18+ """Check if ``x`` is a :py:class:`~pandas.DataFrame`.
19+
20+ Args:
21+ x:
22+ Any object.
23+
24+ Returns:
25+ True if ``x`` is a :py:class:`~pandas.DataFrame`.
26+ """
27+ if hasattr (x , "dtypes" ):
28+ return True
29+
30+ return False
31+
32+
33+ def _sanitize_frame (frame , num_rows : int ):
34+ frame = frame if frame is not None else BiocFrame ({}, number_of_rows = num_rows )
35+
36+ if is_pandas (frame ):
37+ frame = BiocFrame .from_pandas (frame )
38+
39+ return frame
40+
41+
1642def _validate_data_and_partitions (unlist_data , partition ):
1743 if len (unlist_data ) != partition .nobj ():
1844 raise ValueError (
@@ -33,7 +59,7 @@ def __init__(
3359 unlist_data : Any ,
3460 partitioning : Partitioning ,
3561 element_type : Any = None ,
36- element_metadata : Optional [dict ] = None ,
62+ element_metadata : Optional [BiocFrame ] = None ,
3763 metadata : Optional [Union [Dict [str , Any ], ut .NamedList ]] = None ,
3864 _validate : bool = True ,
3965 ):
@@ -64,7 +90,7 @@ class for the type of elements.
6490 self ._unlist_data = unlist_data
6591 self ._partitioning = partitioning
6692 self ._element_type = element_type
67- self ._element_metadata = element_metadata or {}
93+ self ._element_metadata = _sanitize_frame ( element_metadata , len ( partitioning ))
6894
6995 if _validate :
7096 _validate_data_and_partitions (self ._unlist_data , self ._partitioning )
@@ -93,6 +119,7 @@ def __deepcopy__(self, memo=None, _nil=[]):
93119 element_type = _elem_type_copy ,
94120 element_metadata = _elem_metadata_copy ,
95121 metadata = _metadata_copy ,
122+ _validate = False ,
96123 )
97124
98125 def __copy__ (self ):
@@ -107,6 +134,7 @@ def __copy__(self):
107134 element_type = self ._element_type ,
108135 element_metadata = self ._element_metadata ,
109136 metadata = self ._metadata ,
137+ _validate = False ,
110138 )
111139
112140 def copy (self ):
@@ -150,8 +178,7 @@ def __repr__(self) -> str:
150178 _etype_name = self ._element_type .__name__
151179 output += ", element_type=" + _etype_name
152180
153- if len (self ._element_metadata ) > 0 :
154- output += ", element_metadata=" + ut .print_truncated_dict (self ._element_metadata )
181+ output += ", element_metadata=" + self ._element_metadata .__repr__ ()
155182
156183 if len (self ._metadata ) > 0 :
157184 output += ", metadata=" + ut .print_truncated_dict (self ._metadata )
@@ -178,7 +205,7 @@ def __str__(self) -> str:
178205
179206 output += f"partitioning: { ut .print_truncated_list (self ._partitioning )} \n "
180207
181- output += f"element_metadata({ str (len (self ._element_metadata ))} ): { ut .print_truncated_list (list (self ._element_metadata .keys ()), sep = ' ' , include_brackets = False , transform = lambda y : y )} \n "
208+ output += f"element_metadata({ str (len (self ._element_metadata ))} rows ): { ut .print_truncated_list (list (self ._element_metadata .get_column_names ()), sep = ' ' , include_brackets = False , transform = lambda y : y )} \n "
182209 output += f"metadata({ str (len (self ._metadata ))} ): { ut .print_truncated_list (list (self ._metadata .keys ()), sep = ' ' , include_brackets = False , transform = lambda y : y )} \n "
183210
184211 return output
@@ -303,14 +330,14 @@ def unlist_data(self, unlist_data: Any):
303330 ######>> element metadata <<#######
304331 ###################################
305332
306- def get_element_metadata (self ) -> dict :
333+ def get_element_metadata (self ) -> BiocFrame :
307334 """
308335 Returns:
309336 Dictionary of metadata for each element in this object.
310337 """
311338 return self ._element_metadata
312339
313- def set_element_metadata (self , element_metadata : dict , in_place : bool = False ) -> CompressedList :
340+ def set_element_metadata (self , element_metadata : BiocFrame , in_place : bool = False ) -> CompressedList :
314341 """Set new element metadata.
315342
316343 Args:
@@ -324,19 +351,20 @@ def set_element_metadata(self, element_metadata: dict, in_place: bool = False) -
324351 A modified ``CompressedList`` object, either as a copy of the original
325352 or as a reference to the (in-place-modified) original.
326353 """
327- if not isinstance (element_metadata , dict ):
328- raise TypeError (f"`element_metadata` must be a dictionary, provided { type (element_metadata )} ." )
354+ if not isinstance (element_metadata , BiocFrame ):
355+ raise TypeError (f"`element_metadata` must be a BiocFrame, provided { type (element_metadata )} ." )
356+
329357 output = self ._define_output (in_place )
330- output ._element_metadata = element_metadata
358+ output ._element_metadata = _sanitize_frame ( element_metadata , len ( self . _partitioning ))
331359 return output
332360
333361 @property
334- def element_metadata (self ) -> dict :
362+ def element_metadata (self ) -> BiocFrame :
335363 """Alias for :py:attr:`~get_element_metadata`."""
336364 return self .get_element_metadata ()
337365
338366 @element_metadata .setter
339- def element_metadata (self , element_metadata : dict ):
367+ def element_metadata (self , element_metadata : BiocFrame ):
340368 """Alias for :py:attr:`~set_element_metadata` with ``in_place = True``.
341369
342370 As this mutates the original object, a warning is raised.
@@ -576,7 +604,7 @@ def extract_subset(self, indices: Sequence[int]) -> CompressedList:
576604 new_data ,
577605 new_partitioning ,
578606 element_type = self ._element_type ,
579- element_metadata = { k : v for k , v in self ._element_metadata . items () if k in indices } ,
607+ element_metadata = self ._element_metadata [ indices ,] ,
580608 metadata = self ._metadata .copy (),
581609 )
582610
0 commit comments