1. add auth tests

2. fix badges 3. add codecov automation and details 4. implement process() function, Switch to live end-point, 5. Extend support for partial download of data, expose additonal functions. 6. Additional documentation
amor71 · Jun 26, 2022 · fbbb74d · fbbb74d
1 parent 64b9896
commit fbbb74d
Show file tree

Hide file tree

Showing 10 changed files with 198 additions and 5 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -21,6 +21,15 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
+    - uses: actions/checkout@master
+    - uses: codecov/codecov-action@v2
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
+        files: ./coverage1.xml,./coverage2.xml # optional
+        flags: unittests # optional
+        name: codecov-umbrella # optional
+        fail_ci_if_error: true # optional (default = false)
+        verbose: true # optional (default = false)
     - uses: actions/checkout@v3
     - name: Set up Python
       uses: actions/setup-python@v3
@@ -37,3 +46,4 @@ jobs:
       with:
         user: __token__
         password: ${{ secrets.PYPI_API }}
+
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+tools/
 
 # C extensions
 *.so

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
-[![Updates](https://pyup.io/repos/github/amor71/finrashortdata/shield.svg)](https://pyup.io/repos/github/amor71/finrashortdata/)
-[![Python 3](https://pyup.io/repos/github/amor71/finrashortdata/python-3-shield.svg)](https://pyup.io/repos/github/amor71/finrashortdata/)
+[![Python 3](https://pyup.io/repos/github/amor71/FINRAShortData/python-3-shield.svg)](https://pyup.io/repos/github/amor71/FINRAShortData/)
+[![Updates](https://pyup.io/repos/github/amor71/FINRAShortData/shield.svg)](https://pyup.io/repos/github/amor71/FINRAShortData/)
 [![Sourcery](https://img.shields.io/badge/Sourcery-enabled-brightgreen)](https://sourcery.ai)
+[![codecov](https://codecov.io/gh/amor71/FINRAShortData/branch/main/graph/badge.svg?token=Gy7JKcpOqh)](https://codecov.io/gh/amor71/FINRAShortData)
 
 # FINRAShortData
 Process FINRA Short Daily Data [feeds](https://developer.finra.org/docs#query_api-equity-equity_short_interest_standardized)
@@ -22,18 +23,26 @@ To install the package type:
 ### Authenticate
 
 ```python
- from finrashortdata import auth
+from finrashortdata import auth
 token = auth(client_id=<your api client id>, secret=<your api secret>)
 ```
 
-### Basic data loading & processing
+### Example 1: Basic data loading & processing
 
 ```python
- from finrashortdata import process
+from finrashortdata import process
 import pandas as pd
 df : pd.DataFrame = process(token)
 ```
 
+### Example 2: load latest data
+from finrashortdata import get_chunk_and_size, process
+
+chunk, max_data = get_chunk_and_size(token)
+df : pd.DataFrame = process(token=token, offset=max_data-10*chunk)
+
+
+
 ## Licensing
 
 [GNU GPL v.3](https://github.com/amor71/FINRAShortData/blob/main/LICENSE)

diff --git a/finrashortdata/__init__.py b/finrashortdata/__init__.py
@@ -1,3 +1,4 @@
 __version__ = "0.0.01"
 
 from .auth import auth
+from .equity import get_chunk_and_size, process
diff --git a/finrashortdata/auth.py b/finrashortdata/auth.py
@@ -12,6 +12,9 @@ def auth(client_id: str, secret: str) -> str:
     Returns: If successful returns generated token, otherwise throws exception.
     """
 
+    if not client_id or not secret:
+        raise TypeError("client_id and secret can not be None")
+
     url = "https://ews.fip.finra.org/fip/rest/ews/oauth2/access_token"
     params = {"grant_type": "client_credentials"}
     auth = HTTPBasicAuth(client_id, secret)

diff --git a/finrashortdata/equity.py b/finrashortdata/equity.py
@@ -0,0 +1,92 @@
+import asyncio
+import concurrent.futures
+import time
+from typing import Optional, Tuple
+
+import pandas as pd
+import requests
+
+url: str = "https://api.finra.org/data/group/OTCMarket/name/regShoDaily"
+
+
+def _requests_get(token: str, chunk_size: int, offset: int) -> pd.DataFrame:
+    r = requests.get(
+        url=url,
+        headers={
+            "Authorization": f"Bearer {token}",
+            "Accept": "application/json",
+        },
+        params={"limit": chunk_size, "offset": offset},
+    )
+    r.raise_for_status()
+
+    if r.status_code in (429, 502):
+        print(f"{url} return {r.status_code}, waiting and re-trying")
+        time.sleep(10)
+        return _requests_get(token, chunk_size, offset)
+
+    x = r.json()
+    df = pd.DataFrame(x)
+    df.rename(
+        columns={"securitiesInformationProcessorSymbolIdentifier": "symbol"},
+        inplace=True,
+    )
+    df.drop(["reportingFacilityCode", "marketCode"], axis=1, inplace=True)
+    return df
+
+
+def get_chunk_and_size(token: str) -> Tuple[int, int]:
+    """Return the optimal chunk size and total number of data-points,
+
+    Chunk size is used internally, by the process() function
+    to reduce the number of calls to the FINRA end-point,
+    it is also used as the 'offset' step when calling process() directly with restrictions.
+
+    Input Arguments: token obtained from the auth() function.
+    Returns: tuple with chunk size followed by number of data-points to be loaded from FINRA end-point.
+    """
+    r = requests.get(
+        url=url,
+        headers={
+            "Authorization": f"Bearer {token}",
+            "Accept": "application/json",
+        },
+        params={"limit": 1},
+    )
+    r.raise_for_status()
+
+    return int(r.headers["Record-Max-Limit"]), int(r.headers["Record-Total"])
+
+
+async def process(
+    token: str, offset: int = 0, limit: Optional[int] = None
+) -> pd.DataFrame:
+    """Download Daily Short details
+
+    Input Arguments:
+        token -> obtained from the auth() function.
+        offset -> starting point (default 0).
+        limit -> end point (default not limit).
+    Returns: If successful returns DataFrame with all details
+    """
+    chunk_size, max_records = get_chunk_and_size(token)
+
+    if limit:
+        max_records = min(max_records, limit)
+
+    print(
+        f"loading data (chunk_size={chunk_size}, max_records={max_records-offset})..."
+    )
+    returned_df: pd.DataFrame = pd.DataFrame()
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        loop = asyncio.get_event_loop()
+        futures = [
+            loop.run_in_executor(
+                executor, _requests_get, token, chunk_size, offset
+            )
+            for offset in range(offset, max_records, chunk_size)
+        ]
+        returned_df = pd.concat(await asyncio.gather(*futures))
+        print(returned_df.shape, max_records)
+
+    return returned_df.groupby(["tradeReportDate", "symbol"]).sum()
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+asyncio_mode=auto
diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -6,3 +6,6 @@ bandit
 isort
 mypy
 requests
+pytest
+pytest-asyncio
+
diff --git a/tests/test_auth.py b/tests/test_auth.py
@@ -0,0 +1,48 @@
+import os
+
+import pytest
+
+from finrashortdata import auth
+
+
+def test_auth_positive() -> bool:
+    client_id = os.getenv("TEST_API_CLIENT_ID", None)
+    secret = os.getenv("TEST_API_SECRET", None)
+
+    if not client_id or not secret:
+        raise AssertionError(
+            "tests require env variables TEST_API_CLIENT_ID, TEST_API_SECRET"
+        )
+
+    token = auth(client_id, secret)
+
+    print(token)
+
+    return True
+
+
+def test_auth_no_type() -> bool:
+    try:
+        auth()  # type: ignore
+    except TypeError:
+        return True
+
+    raise AssertionError("Excepted TypeError exception")
+
+
+def test_auth_no_secret() -> bool:
+    try:
+        auth("id1")  # type: ignore
+    except TypeError:
+        return True
+
+    raise AssertionError("Excepted TypeError exception")
+
+
+def test_auth_none_values() -> bool:
+    try:
+        auth(None, "secret")  # type: ignore
+    except TypeError:
+        return True
+
+    raise AssertionError("Excepted TypeError exception")
diff --git a/tests/test_process.py b/tests/test_process.py
@@ -0,0 +1,24 @@
+import os
+
+import pandas as pd
+import pytest
+
+from finrashortdata import auth, get_chunk_and_size, process
+
+
+async def test_process_positive() -> bool:
+    client_id = os.getenv("TEST_API_CLIENT_ID", None)
+    secret = os.getenv("TEST_API_SECRET", None)
+
+    if not client_id or not secret:
+        raise AssertionError(
+            "tests require env variables TEST_API_CLIENT_ID, TEST_API_SECRET"
+        )
+    token = auth(client_id, secret)
+    chunk, max_data = get_chunk_and_size(token)
+    _df: pd.DataFrame = await process(
+        token=token, offset=max_data - 10 * chunk
+    )
+    print(_df)
+
+    return True
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,6 @@ bandit @@
     isort
     mypy
     requests
+    pytest
+    pytest-asyncio