9
9
from databento .common .data import (
10
10
COLUMNS ,
11
11
DEFINITION_CHARARRAY_COLUMNS ,
12
+ DEFINITION_PRICE_COLUMNS ,
13
+ DEFINITION_TYPE_MAX_MAP ,
12
14
DERIV_SCHEMAS ,
13
15
STRUCT_MAP ,
14
16
)
@@ -442,8 +444,20 @@ def to_df(
442
444
"""
443
445
df = pd .DataFrame (self .to_ndarray ())
444
446
df .set_index (self ._get_index_column (), inplace = True )
447
+ df = self ._cleanup_dataframe (df )
445
448
446
- # Cleanup dataframe
449
+ if pretty_ts :
450
+ df = self ._apply_pretty_ts (df )
451
+
452
+ if pretty_px :
453
+ df = self ._apply_pretty_px (df )
454
+
455
+ if map_symbols and self .schema != Schema .DEFINITION :
456
+ df = self ._map_symbols (df , pretty_ts )
457
+
458
+ return df
459
+
460
+ def _cleanup_dataframe (self , df : pd .DataFrame ) -> pd .DataFrame :
447
461
df .drop (["length" , "rtype" ], axis = 1 , inplace = True )
448
462
if self .schema == Schema .MBO or self .schema in DERIV_SCHEMAS :
449
463
df = df .reindex (columns = COLUMNS [self .schema ])
@@ -453,39 +467,52 @@ def to_df(
453
467
elif self .schema == Schema .DEFINITION :
454
468
for column in DEFINITION_CHARARRAY_COLUMNS :
455
469
df [column ] = df [column ].str .decode ("utf-8" )
470
+ for column , type_max in DEFINITION_TYPE_MAX_MAP .items ():
471
+ if column in df .columns :
472
+ df [column ] = df [column ].where (df [column ] != type_max , np .nan )
456
473
457
- if pretty_ts :
458
- df .index = pd .to_datetime (df .index , utc = True )
459
- for column in df .columns :
460
- if column .startswith ("ts_" ) and "delta" not in column :
461
- df [column ] = pd .to_datetime (df [column ], utc = True )
474
+ return df
462
475
463
- if self .schema == Schema .DEFINITION :
464
- df ["expiration" ] = pd .to_datetime (df ["expiration" ], utc = True )
465
- df ["activation" ] = pd .to_datetime (df ["activation" ], utc = True )
476
+ def _apply_pretty_ts (self , df : pd .DataFrame ) -> pd .DataFrame :
477
+ df .index = pd .to_datetime (df .index , utc = True )
478
+ for column in df .columns :
479
+ if column .startswith ("ts_" ) and "delta" not in column :
480
+ df [column ] = pd .to_datetime (df [column ], utc = True )
466
481
467
- if pretty_px :
468
- for column in list (df .columns ):
469
- if (
470
- column in ("price" , "open" , "high" , "low" , "close" )
471
- or column .startswith ("bid_px" ) # MBP
472
- or column .startswith ("ask_px" ) # MBP
473
- ):
474
- df [column ] = df [column ] * 1e-9
475
-
476
- if map_symbols :
477
- # Build product ID index
478
- if not self ._product_id_index :
479
- self ._product_id_index = self ._build_product_id_index ()
480
-
481
- # Map product IDs to native symbols
482
- if self ._product_id_index :
483
- df_index = df .index if pretty_ts else pd .to_datetime (df .index , utc = True )
484
- dates = [ts .date () for ts in df_index ]
485
- df ["symbol" ] = [
486
- self ._product_id_index [dates [i ]][p ]
487
- for i , p in enumerate (df ["product_id" ])
488
- ]
482
+ if self .schema == Schema .DEFINITION :
483
+ df ["expiration" ] = pd .to_datetime (df ["expiration" ], utc = True )
484
+ df ["activation" ] = pd .to_datetime (df ["activation" ], utc = True )
485
+
486
+ return df
487
+
488
+ def _apply_pretty_px (self , df : pd .DataFrame ) -> pd .DataFrame :
489
+ for column in list (df .columns ):
490
+ if (
491
+ column in ("price" , "open" , "high" , "low" , "close" )
492
+ or column .startswith ("bid_px" ) # MBP
493
+ or column .startswith ("ask_px" ) # MBP
494
+ ):
495
+ df [column ] = df [column ] * 1e-9
496
+
497
+ if self .schema == Schema .DEFINITION :
498
+ for column in DEFINITION_PRICE_COLUMNS :
499
+ df [column ] = df [column ] * 1e-9
500
+
501
+ return df
502
+
503
+ def _map_symbols (self , df : pd .DataFrame , pretty_ts : bool ) -> pd .DataFrame :
504
+ # Build product ID index
505
+ if not self ._product_id_index :
506
+ self ._product_id_index = self ._build_product_id_index ()
507
+
508
+ # Map product IDs to native symbols
509
+ if self ._product_id_index :
510
+ df_index = df .index if pretty_ts else pd .to_datetime (df .index , utc = True )
511
+ dates = [ts .date () for ts in df_index ]
512
+ df ["symbol" ] = [
513
+ self ._product_id_index [dates [i ]][p ]
514
+ for i , p in enumerate (df ["product_id" ])
515
+ ]
489
516
490
517
return df
491
518
0 commit comments