Skip to content

Commit fbfa989

Browse files
committed
ADD: Add support for definition schema
1 parent 5953d18 commit fbfa989

File tree

6 files changed

+85
-37
lines changed

6 files changed

+85
-37
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# Changelog
22

3-
## TBD
3+
## 0.7.0 - 2023-01-10
4+
- Added support for `definition` schema
45
- Updated `Flags` enum
6+
- Upgraded `dbz-python` to `0.2.1`
7+
- Upgraded `zstandard` to `0.19.0`
58

69
## 0.6.0 - 2022-12-02
710
- Added `metadata.get_dataset_condition` method to `Historical` client

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ The library is fully compatible with the latest distribution of Anaconda 3.7 and
3131
The minimum dependencies as found in the `requirements.txt` are also listed below:
3232
- Python (>=3.7)
3333
- aiohttp (>=3.7.2)
34-
- dbz-python (>=0.2.0)
34+
- dbz-python (>=0.2.1)
3535
- numpy (>=1.17.0)
3636
- pandas (>=1.1.3)
3737
- requests (>=2.24.0)
38-
- zstandard (>=0.18.0)
38+
- zstandard (>=0.19.0)
3939

4040
## Installation
4141
To install the latest stable version of the package from PyPI:
@@ -56,6 +56,8 @@ import databento as db
5656
client = db.Historical('YOUR_API_KEY')
5757
data = client.timeseries.stream(
5858
dataset='GLBX.MDP3',
59+
symbols='ES.FUT',
60+
stype_in='smart',
5961
start='2022-06-10T14:30',
6062
end='2022-06-10T14:40',
6163
)

databento/common/bento.py

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from databento.common.data import (
1010
COLUMNS,
1111
DEFINITION_CHARARRAY_COLUMNS,
12+
DEFINITION_PRICE_COLUMNS,
13+
DEFINITION_TYPE_MAX_MAP,
1214
DERIV_SCHEMAS,
1315
STRUCT_MAP,
1416
)
@@ -442,8 +444,20 @@ def to_df(
442444
"""
443445
df = pd.DataFrame(self.to_ndarray())
444446
df.set_index(self._get_index_column(), inplace=True)
447+
df = self._cleanup_dataframe(df)
445448

446-
# Cleanup dataframe
449+
if pretty_ts:
450+
df = self._apply_pretty_ts(df)
451+
452+
if pretty_px:
453+
df = self._apply_pretty_px(df)
454+
455+
if map_symbols and self.schema != Schema.DEFINITION:
456+
df = self._map_symbols(df, pretty_ts)
457+
458+
return df
459+
460+
def _cleanup_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
447461
df.drop(["length", "rtype"], axis=1, inplace=True)
448462
if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS:
449463
df = df.reindex(columns=COLUMNS[self.schema])
@@ -453,39 +467,52 @@ def to_df(
453467
elif self.schema == Schema.DEFINITION:
454468
for column in DEFINITION_CHARARRAY_COLUMNS:
455469
df[column] = df[column].str.decode("utf-8")
470+
for column, type_max in DEFINITION_TYPE_MAX_MAP.items():
471+
if column in df.columns:
472+
df[column] = df[column].where(df[column] != type_max, np.nan)
456473

457-
if pretty_ts:
458-
df.index = pd.to_datetime(df.index, utc=True)
459-
for column in df.columns:
460-
if column.startswith("ts_") and "delta" not in column:
461-
df[column] = pd.to_datetime(df[column], utc=True)
474+
return df
462475

463-
if self.schema == Schema.DEFINITION:
464-
df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
465-
df["activation"] = pd.to_datetime(df["activation"], utc=True)
476+
def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
477+
df.index = pd.to_datetime(df.index, utc=True)
478+
for column in df.columns:
479+
if column.startswith("ts_") and "delta" not in column:
480+
df[column] = pd.to_datetime(df[column], utc=True)
466481

467-
if pretty_px:
468-
for column in list(df.columns):
469-
if (
470-
column in ("price", "open", "high", "low", "close")
471-
or column.startswith("bid_px") # MBP
472-
or column.startswith("ask_px") # MBP
473-
):
474-
df[column] = df[column] * 1e-9
475-
476-
if map_symbols:
477-
# Build product ID index
478-
if not self._product_id_index:
479-
self._product_id_index = self._build_product_id_index()
480-
481-
# Map product IDs to native symbols
482-
if self._product_id_index:
483-
df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
484-
dates = [ts.date() for ts in df_index]
485-
df["symbol"] = [
486-
self._product_id_index[dates[i]][p]
487-
for i, p in enumerate(df["product_id"])
488-
]
482+
if self.schema == Schema.DEFINITION:
483+
df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
484+
df["activation"] = pd.to_datetime(df["activation"], utc=True)
485+
486+
return df
487+
488+
def _apply_pretty_px(self, df: pd.DataFrame) -> pd.DataFrame:
489+
for column in list(df.columns):
490+
if (
491+
column in ("price", "open", "high", "low", "close")
492+
or column.startswith("bid_px") # MBP
493+
or column.startswith("ask_px") # MBP
494+
):
495+
df[column] = df[column] * 1e-9
496+
497+
if self.schema == Schema.DEFINITION:
498+
for column in DEFINITION_PRICE_COLUMNS:
499+
df[column] = df[column] * 1e-9
500+
501+
return df
502+
503+
def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
504+
# Build product ID index
505+
if not self._product_id_index:
506+
self._product_id_index = self._build_product_id_index()
507+
508+
# Map product IDs to native symbols
509+
if self._product_id_index:
510+
df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
511+
dates = [ts.date() for ts in df_index]
512+
df["symbol"] = [
513+
self._product_id_index[dates[i]][p]
514+
for i, p in enumerate(df["product_id"])
515+
]
489516

490517
return df
491518

databento/common/data.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,22 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
204204
"user_defined_instrument",
205205
]
206206

207+
DEFINITION_PRICE_COLUMNS = [
208+
"min_price_increment",
209+
"display_factor",
210+
"high_limit_price",
211+
"low_limit_price",
212+
"max_price_variation",
213+
"trading_reference_price",
214+
"min_price_increment_amount",
215+
]
216+
217+
DEFINITION_TYPE_MAX_MAP = {
218+
x[0]: np.iinfo(x[1]).max
219+
for x in STRUCT_MAP[Schema.DEFINITION]
220+
if not isinstance(x[1], str)
221+
}
222+
207223
################################################################################
208224
# DBZ fields
209225
################################################################################

databento/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.0"
1+
__version__ = "0.7.0"

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
aiohttp>=3.7.2
2-
dbz-python>=0.2.0
2+
dbz-python>=0.2.1
33
numpy>=1.17.0
44
pandas>=1.1.3
55
requests>=2.24.0
6-
zstandard>=0.18.0
6+
zstandard>=0.19.0

0 commit comments

Comments
 (0)