Skip to content

Commit

Permalink
switch from APIs to processing files
Browse files Browse the repository at this point in the history
  • Loading branch information
amor71 committed Sep 23, 2022
1 parent e95a48e commit 90803a7
Show file tree
Hide file tree
Showing 6 changed files with 549 additions and 198 deletions.
35 changes: 14 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,8 @@
[![codecov](https://codecov.io/gh/amor71/FINRAShortData/branch/main/graph/badge.svg?token=Gy7JKcpOqh)](https://codecov.io/gh/amor71/FINRAShortData)

# FINRAShortData
Process FINRA Short Daily Data [feeds](https://developer.finra.org/docs#query_api-equity-equity_short_interest_standardized)

## Prerequisite

* FINRA Developer Credentials are required. If you do not yet have an account, [create one here](https://developer.finra.org/create-account?Forward_URL=https://gateway.finra.org/app/dfo-console?rcpRedirNum=1).

* Once you have access, you will need to create an API key. Daily Short Data feeds are free. [click here](https://gateway.finra.org/app/api-console/add-credential) to create API credential and follow the instructions.
Process FINRA Short Daily Data [feeds](https://www.finra.org/finra-data/browse-catalog/short-sale-volume-data/daily-short-sale-volume-files)

## Install

Expand All @@ -20,29 +15,30 @@ To install the package type:

## Quick start

### Authenticate
### Example 1: Daily Short Volumes for past 2 days (inclusive)

```python
from finrashortdata import auth
token = auth(client_id=<your api client id>, secret=<your api secret>)
import asyncio
from finrashortdata import daily_shorts
import pandas as pd

df : pd.DataFrame = asyncio.run(daily_shorts(offset=2))
```

### Example 1: Basic data loading & processing
### Example 2: Daily Short Volumes for time_range

```python
import asyncio
from finrashortdata import daily_shorts
from datetime import date
import pandas as pd
df : pd.DataFrame = daily_shorts(token)
```

### Example 2: load latest data
```python
from finrashortdata import daily_shorts_chunk_and_size, daily_shorts

chunk, max_data = daily_shorts_chunk_and_size(token)
df : pd.DataFrame = daily_shorts(token=token, offset=max_data-10*chunk)
df : pd.DataFrame = asyncio.run(daily_shorts(
start_date=date(year=2022, month=9, day=1),
end_date=date(year=2022, month=9, day=10)))
```

*Scripts work as-is*

## Licensing

Expand All @@ -55,6 +51,3 @@ Use the [Issues](https://github.com/amor71/FINRAShortData/issues) section
## Contributing

If you'd like to contribute to the project, drop me a line at mailto:[email protected]



5 changes: 2 additions & 3 deletions finrashortdata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
__version__ = "0.0.13"
__version__ = "0.1.0"

from .auth import auth
from .daily import daily_shorts, daily_shorts_chunk_and_size
from .daily import daily_shorts
143 changes: 67 additions & 76 deletions finrashortdata/daily.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,94 @@
import asyncio
import concurrent.futures
import time
from typing import Optional, Tuple
import io
from datetime import date, datetime, timezone
from typing import List, Optional

import pandas as pd
import pandas_market_calendars
import requests

from .decorators import timeit

url: str = "https://api.finra.org/data/group/OTCMarket/name/regShoDaily"


def _requests_get(token: str, chunk_size: int, offset: int) -> pd.DataFrame:
r = requests.get(
url=url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
params={"limit": chunk_size, "offset": offset},
)
r.raise_for_status()

if r.status_code in (429, 502):
print(f"{url} return {r.status_code}, waiting and re-trying")
time.sleep(10)
return _requests_get(token, chunk_size, offset)

x = r.json()
df = pd.DataFrame(x)
df.rename(
columns={
"securitiesInformationProcessorSymbolIdentifier": "symbol",
"totalParQuantity": "volume",
"shortParQuantity": "shorts",
"shortExemptParQuantity": "exempt",
},
inplace=True,
def _short_by_date(d: datetime) -> pd.DataFrame:
base_url = f'https://cdn.finra.org/equity/regsho/daily/CNMSshvol{d.strftime("%Y%m%d")}.txt'
content = requests.get(base_url).content
df = pd.read_csv(
io.StringIO(content.decode("utf-8")),
sep="|",
engine="python",
skipfooter=1,
keep_default_na=False,
)
df.drop(["reportingFacilityCode", "marketCode"], axis=1, inplace=True)
df["date"] = d.date()

if not df.empty:
del df["Date"]
df["ShortPercent"] = round(
100.0 * df["ShortVolume"] / df["TotalVolume"], 2
)
df["ShortExemptPercent"] = round(
100.0 * df["ShortExemptVolume"] / df["TotalVolume"], 2
)
return df.set_index(["Symbol", "date"]).sort_index().dropna()

return df


def daily_shorts_chunk_and_size(token: str) -> Tuple[int, int]:
"""Return the optimal chunk size and total number of data-points,
def _get_trading_holidays(
mcal: pandas_market_calendars.MarketCalendar,
) -> List[str]:
return mcal.holidays().holidays

Chunk size is used internally, by the daily_shorts() function
to reduce the number of calls to the FINRA end-point,
it is also used as the 'offset' step when calling daily_shorts() directly with restrictions.

Input Arguments: token obtained from the auth() function.
Returns: tuple with chunk size followed by number of data-points to be loaded from FINRA end-point.
"""
r = requests.get(
url=url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
params={"limit": 1},
def _calc_start_date_from_offset(
mcal: pandas_market_calendars.MarketCalendar, end_date: date, offset: int
) -> date:
cbd_offset = pd.tseries.offsets.CustomBusinessDay(
n=offset - 1, holidays=_get_trading_holidays(mcal)
)
r.raise_for_status()
return int(r.headers["Record-Max-Limit"]), int(r.headers["Record-Total"])
return (datetime.now(timezone.utc) - cbd_offset).date()


def _short_iterator(days: List) -> pd.DataFrame:
df = pd.DataFrame()
for day in days:
day_df = _short_by_date(day)
if not day_df.empty:
df = (
day_df
if df.empty
else pd.concat([df, day_df], axis=0).sort_index()
)

return df


@timeit
async def daily_shorts(
token: str, offset: int = 0, limit: Optional[int] = None
start_date: Optional[date] = None,
end_date: Optional[date] = date.today(),
offset: Optional[int] = None,
) -> pd.DataFrame:
"""Download Daily Short details
Input Arguments:
token -> obtained from the auth() function.
offset -> starting point (default 0).
limit -> end point (default not limit).
start_date -> Optional, start date for pulling short-date.
end_date -> last date (inclusive) for pulling short-date.
offset -> If start_date not provided, calculate start date as offset from end_date.
Returns: If successful returns DataFrame with all details
"""
chunk_size, max_records = daily_shorts_chunk_and_size(token)

if limit:
max_records = min(max_records, limit)

print(
f"loading data (chunk_size={chunk_size}, offset={offset}, max_records={max_records-offset})..."
)
with concurrent.futures.ThreadPoolExecutor() as executor:
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(
executor, _requests_get, token, chunk_size, offset
)
for offset in range(offset, max_records, chunk_size)
]
df = (
pd.concat(await asyncio.gather(*futures))
.groupby(["tradeReportDate", "symbol"])
.sum()
if not start_date and not offset:
raise ValueError(
"daily_shorts(): must have either start_date or offset"
)
elif not start_date and offset < 1: # type: ignore
raise ValueError("daily_shorts(): offset >= 1")

df["short_percent"] = round(100.0 * df.shorts / df.volume, 1)
nyse = pandas_market_calendars.get_calendar("NYSE")
if not start_date:
start_date = _calc_start_date_from_offset(nyse, end_date, offset) # type: ignore

return df
schedule = nyse.schedule(start_date=start_date, end_date=end_date)
days = schedule.index.to_list()

return _short_iterator(days) if len(days) else pd.DataFrame()
Loading

0 comments on commit 90803a7

Please sign in to comment.