ADD: Add support for definition schema

cjdsellers · cjdsellers · commit fbfa9897a1c5 · 2023-01-10T05:14:13.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,10 @@
 # Changelog
 
-## TBD
+## 0.7.0 - 2023-01-10
+- Added support for `definition` schema
 - Updated `Flags` enum
+- Upgraded `dbz-python` to `0.2.1`
+- Upgraded `zstandard` to `0.19.0`
 
 ## 0.6.0 - 2022-12-02
 - Added `metadata.get_dataset_condition` method to `Historical` client
diff --git a/README.md b/README.md
@@ -31,11 +31,11 @@ The library is fully compatible with the latest distribution of Anaconda 3.7 and
 The minimum dependencies as found in the `requirements.txt` are also listed below:
 - Python (>=3.7)
 - aiohttp (>=3.7.2)
-- dbz-python (>=0.2.0)
+- dbz-python (>=0.2.1)
 - numpy (>=1.17.0)
 - pandas (>=1.1.3)
 - requests (>=2.24.0)
-- zstandard (>=0.18.0)
+- zstandard (>=0.19.0)
 
 ## Installation
 To install the latest stable version of the package from PyPI:
@@ -56,6 +56,8 @@ import databento as db
 client = db.Historical('YOUR_API_KEY')
 data = client.timeseries.stream(
     dataset='GLBX.MDP3',
+    symbols='ES.FUT',
+    stype_in='smart',
     start='2022-06-10T14:30',
     end='2022-06-10T14:40',
 )
diff --git a/databento/common/bento.py b/databento/common/bento.py
@@ -9,6 +9,8 @@
 from databento.common.data import (
     COLUMNS,
     DEFINITION_CHARARRAY_COLUMNS,
+    DEFINITION_PRICE_COLUMNS,
+    DEFINITION_TYPE_MAX_MAP,
     DERIV_SCHEMAS,
     STRUCT_MAP,
 )
@@ -442,8 +444,20 @@ def to_df(
         """
         df = pd.DataFrame(self.to_ndarray())
         df.set_index(self._get_index_column(), inplace=True)
+        df = self._cleanup_dataframe(df)
 
-        # Cleanup dataframe
+        if pretty_ts:
+            df = self._apply_pretty_ts(df)
+
+        if pretty_px:
+            df = self._apply_pretty_px(df)
+
+        if map_symbols and self.schema != Schema.DEFINITION:
+            df = self._map_symbols(df, pretty_ts)
+
+        return df
+
+    def _cleanup_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
         df.drop(["length", "rtype"], axis=1, inplace=True)
         if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS:
             df = df.reindex(columns=COLUMNS[self.schema])
@@ -453,39 +467,52 @@ def to_df(
         elif self.schema == Schema.DEFINITION:
             for column in DEFINITION_CHARARRAY_COLUMNS:
                 df[column] = df[column].str.decode("utf-8")
+            for column, type_max in DEFINITION_TYPE_MAX_MAP.items():
+                if column in df.columns:
+                    df[column] = df[column].where(df[column] != type_max, np.nan)
 
-        if pretty_ts:
-            df.index = pd.to_datetime(df.index, utc=True)
-            for column in df.columns:
-                if column.startswith("ts_") and "delta" not in column:
-                    df[column] = pd.to_datetime(df[column], utc=True)
+        return df
 
-            if self.schema == Schema.DEFINITION:
-                df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
-                df["activation"] = pd.to_datetime(df["activation"], utc=True)
+    def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
+        df.index = pd.to_datetime(df.index, utc=True)
+        for column in df.columns:
+            if column.startswith("ts_") and "delta" not in column:
+                df[column] = pd.to_datetime(df[column], utc=True)
 
-        if pretty_px:
-            for column in list(df.columns):
-                if (
-                    column in ("price", "open", "high", "low", "close")
-                    or column.startswith("bid_px")  # MBP
-                    or column.startswith("ask_px")  # MBP
-                ):
-                    df[column] = df[column] * 1e-9
-
-        if map_symbols:
-            # Build product ID index
-            if not self._product_id_index:
-                self._product_id_index = self._build_product_id_index()
-
-            # Map product IDs to native symbols
-            if self._product_id_index:
-                df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
-                dates = [ts.date() for ts in df_index]
-                df["symbol"] = [
-                    self._product_id_index[dates[i]][p]
-                    for i, p in enumerate(df["product_id"])
-                ]
+        if self.schema == Schema.DEFINITION:
+            df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
+            df["activation"] = pd.to_datetime(df["activation"], utc=True)
+
+        return df
+
+    def _apply_pretty_px(self, df: pd.DataFrame) -> pd.DataFrame:
+        for column in list(df.columns):
+            if (
+                column in ("price", "open", "high", "low", "close")
+                or column.startswith("bid_px")  # MBP
+                or column.startswith("ask_px")  # MBP
+            ):
+                df[column] = df[column] * 1e-9
+
+        if self.schema == Schema.DEFINITION:
+            for column in DEFINITION_PRICE_COLUMNS:
+                df[column] = df[column] * 1e-9
+
+        return df
+
+    def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
+        # Build product ID index
+        if not self._product_id_index:
+            self._product_id_index = self._build_product_id_index()
+
+        # Map product IDs to native symbols
+        if self._product_id_index:
+            df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
+            dates = [ts.date() for ts in df_index]
+            df["symbol"] = [
+                self._product_id_index[dates[i]][p]
+                for i, p in enumerate(df["product_id"])
+            ]
 
         return df
 
diff --git a/databento/common/data.py b/databento/common/data.py
@@ -204,6 +204,22 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     "user_defined_instrument",
 ]
 
+DEFINITION_PRICE_COLUMNS = [
+    "min_price_increment",
+    "display_factor",
+    "high_limit_price",
+    "low_limit_price",
+    "max_price_variation",
+    "trading_reference_price",
+    "min_price_increment_amount",
+]
+
+DEFINITION_TYPE_MAX_MAP = {
+    x[0]: np.iinfo(x[1]).max
+    for x in STRUCT_MAP[Schema.DEFINITION]
+    if not isinstance(x[1], str)
+}
+
 ################################################################################
 # DBZ fields
 ################################################################################
diff --git a/databento/version.py b/databento/version.py
@@ -1 +1 @@
-__version__ = "0.6.0"
+__version__ = "0.7.0"
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 aiohttp>=3.7.2
-dbz-python>=0.2.0
+dbz-python>=0.2.1
 numpy>=1.17.0
 pandas>=1.1.3
 requests>=2.24.0
-zstandard>=0.18.0
+zstandard>=0.19.0

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.6.0"`
	`1`	`+__version__ = "0.7.0"`