Skip to content

Commit

Permalink
1. add auth tests
Browse files Browse the repository at this point in the history
2. fix badges
3. add codecov automation and details
4. implement process() function, Switch to live end-point,
5. Extend support for partial download of data, expose additonal
   functions.
6. Additional documentation
  • Loading branch information
amor71 committed Jun 26, 2022
1 parent 64b9896 commit fbbb74d
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 5 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@master
- uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
files: ./coverage1.xml,./coverage2.xml # optional
flags: unittests # optional
name: codecov-umbrella # optional
fail_ci_if_error: true # optional (default = false)
verbose: true # optional (default = false)
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
Expand All @@ -37,3 +46,4 @@ jobs:
with:
user: __token__
password: ${{ secrets.PYPI_API }}

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
tools/

# C extensions
*.so
Expand Down
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[![Updates](https://pyup.io/repos/github/amor71/finrashortdata/shield.svg)](https://pyup.io/repos/github/amor71/finrashortdata/)
[![Python 3](https://pyup.io/repos/github/amor71/finrashortdata/python-3-shield.svg)](https://pyup.io/repos/github/amor71/finrashortdata/)
[![Python 3](https://pyup.io/repos/github/amor71/FINRAShortData/python-3-shield.svg)](https://pyup.io/repos/github/amor71/FINRAShortData/)
[![Updates](https://pyup.io/repos/github/amor71/FINRAShortData/shield.svg)](https://pyup.io/repos/github/amor71/FINRAShortData/)
[![Sourcery](https://img.shields.io/badge/Sourcery-enabled-brightgreen)](https://sourcery.ai)
[![codecov](https://codecov.io/gh/amor71/FINRAShortData/branch/main/graph/badge.svg?token=Gy7JKcpOqh)](https://codecov.io/gh/amor71/FINRAShortData)

# FINRAShortData
Process FINRA Short Daily Data [feeds](https://developer.finra.org/docs#query_api-equity-equity_short_interest_standardized)
Expand All @@ -22,18 +23,26 @@ To install the package type:
### Authenticate

```python
from finrashortdata import auth
from finrashortdata import auth
token = auth(client_id=<your api client id>, secret=<your api secret>)
```

### Basic data loading & processing
### Example 1: Basic data loading & processing

```python
from finrashortdata import process
from finrashortdata import process
import pandas as pd
df : pd.DataFrame = process(token)
```

### Example 2: load latest data
from finrashortdata import get_chunk_and_size, process

chunk, max_data = get_chunk_and_size(token)
df : pd.DataFrame = process(token=token, offset=max_data-10*chunk)



## Licensing

[GNU GPL v.3](https://github.com/amor71/FINRAShortData/blob/main/LICENSE)
Expand Down
1 change: 1 addition & 0 deletions finrashortdata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__version__ = "0.0.01"

from .auth import auth
from .equity import get_chunk_and_size, process
3 changes: 3 additions & 0 deletions finrashortdata/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def auth(client_id: str, secret: str) -> str:
Returns: If successful returns generated token, otherwise throws exception.
"""

if not client_id or not secret:
raise TypeError("client_id and secret can not be None")

url = "https://ews.fip.finra.org/fip/rest/ews/oauth2/access_token"
params = {"grant_type": "client_credentials"}
auth = HTTPBasicAuth(client_id, secret)
Expand Down
92 changes: 92 additions & 0 deletions finrashortdata/equity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import asyncio
import concurrent.futures
import time
from typing import Optional, Tuple

import pandas as pd
import requests

url: str = "https://api.finra.org/data/group/OTCMarket/name/regShoDaily"


def _requests_get(token: str, chunk_size: int, offset: int) -> pd.DataFrame:
r = requests.get(
url=url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
params={"limit": chunk_size, "offset": offset},
)
r.raise_for_status()

if r.status_code in (429, 502):
print(f"{url} return {r.status_code}, waiting and re-trying")
time.sleep(10)
return _requests_get(token, chunk_size, offset)

x = r.json()
df = pd.DataFrame(x)
df.rename(
columns={"securitiesInformationProcessorSymbolIdentifier": "symbol"},
inplace=True,
)
df.drop(["reportingFacilityCode", "marketCode"], axis=1, inplace=True)
return df


def get_chunk_and_size(token: str) -> Tuple[int, int]:
"""Return the optimal chunk size and total number of data-points,
Chunk size is used internally, by the process() function
to reduce the number of calls to the FINRA end-point,
it is also used as the 'offset' step when calling process() directly with restrictions.
Input Arguments: token obtained from the auth() function.
Returns: tuple with chunk size followed by number of data-points to be loaded from FINRA end-point.
"""
r = requests.get(
url=url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
params={"limit": 1},
)
r.raise_for_status()

return int(r.headers["Record-Max-Limit"]), int(r.headers["Record-Total"])


async def process(
token: str, offset: int = 0, limit: Optional[int] = None
) -> pd.DataFrame:
"""Download Daily Short details
Input Arguments:
token -> obtained from the auth() function.
offset -> starting point (default 0).
limit -> end point (default not limit).
Returns: If successful returns DataFrame with all details
"""
chunk_size, max_records = get_chunk_and_size(token)

if limit:
max_records = min(max_records, limit)

print(
f"loading data (chunk_size={chunk_size}, max_records={max_records-offset})..."
)
returned_df: pd.DataFrame = pd.DataFrame()
with concurrent.futures.ThreadPoolExecutor() as executor:
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(
executor, _requests_get, token, chunk_size, offset
)
for offset in range(offset, max_records, chunk_size)
]
returned_df = pd.concat(await asyncio.gather(*futures))
print(returned_df.shape, max_records)

return returned_df.groupby(["tradeReportDate", "symbol"]).sum()
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
asyncio_mode=auto
3 changes: 3 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ bandit
isort
mypy
requests
pytest
pytest-asyncio

48 changes: 48 additions & 0 deletions tests/test_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os

import pytest

from finrashortdata import auth


def test_auth_positive() -> bool:
client_id = os.getenv("TEST_API_CLIENT_ID", None)
secret = os.getenv("TEST_API_SECRET", None)

if not client_id or not secret:
raise AssertionError(
"tests require env variables TEST_API_CLIENT_ID, TEST_API_SECRET"
)

token = auth(client_id, secret)

print(token)

return True


def test_auth_no_type() -> bool:
try:
auth() # type: ignore
except TypeError:
return True

raise AssertionError("Excepted TypeError exception")


def test_auth_no_secret() -> bool:
try:
auth("id1") # type: ignore
except TypeError:
return True

raise AssertionError("Excepted TypeError exception")


def test_auth_none_values() -> bool:
try:
auth(None, "secret") # type: ignore
except TypeError:
return True

raise AssertionError("Excepted TypeError exception")
24 changes: 24 additions & 0 deletions tests/test_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os

import pandas as pd
import pytest

from finrashortdata import auth, get_chunk_and_size, process


async def test_process_positive() -> bool:
client_id = os.getenv("TEST_API_CLIENT_ID", None)
secret = os.getenv("TEST_API_SECRET", None)

if not client_id or not secret:
raise AssertionError(
"tests require env variables TEST_API_CLIENT_ID, TEST_API_SECRET"
)
token = auth(client_id, secret)
chunk, max_data = get_chunk_and_size(token)
_df: pd.DataFrame = await process(
token=token, offset=max_data - 10 * chunk
)
print(_df)

return True

0 comments on commit fbbb74d

Please sign in to comment.