-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
switch from APIs to processing files
- Loading branch information
Showing
6 changed files
with
549 additions
and
198 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,13 +4,8 @@ | |
[](https://codecov.io/gh/amor71/FINRAShortData) | ||
|
||
# FINRAShortData | ||
Process FINRA Short Daily Data [feeds](https://developer.finra.org/docs#query_api-equity-equity_short_interest_standardized) | ||
|
||
## Prerequisite | ||
|
||
* FINRA Developer Credentials are required. If you do not yet have an account, [create one here](https://developer.finra.org/create-account?Forward_URL=https://gateway.finra.org/app/dfo-console?rcpRedirNum=1). | ||
|
||
* Once you have access, you will need to create an API key. Daily Short Data feeds are free. [click here](https://gateway.finra.org/app/api-console/add-credential) to create API credential and follow the instructions. | ||
Process FINRA Short Daily Data [feeds](https://www.finra.org/finra-data/browse-catalog/short-sale-volume-data/daily-short-sale-volume-files) | ||
|
||
## Install | ||
|
||
|
@@ -20,29 +15,30 @@ To install the package type: | |
|
||
## Quick start | ||
|
||
### Authenticate | ||
### Example 1: Daily Short Volumes for past 2 days (inclusive) | ||
|
||
```python | ||
from finrashortdata import auth | ||
token = auth(client_id=<your api client id>, secret=<your api secret>) | ||
import asyncio | ||
from finrashortdata import daily_shorts | ||
import pandas as pd | ||
|
||
df : pd.DataFrame = asyncio.run(daily_shorts(offset=2)) | ||
``` | ||
|
||
### Example 1: Basic data loading & processing | ||
### Example 2: Daily Short Volumes for time_range | ||
|
||
```python | ||
import asyncio | ||
from finrashortdata import daily_shorts | ||
from datetime import date | ||
import pandas as pd | ||
df : pd.DataFrame = daily_shorts(token) | ||
``` | ||
|
||
### Example 2: load latest data | ||
```python | ||
from finrashortdata import daily_shorts_chunk_and_size, daily_shorts | ||
|
||
chunk, max_data = daily_shorts_chunk_and_size(token) | ||
df : pd.DataFrame = daily_shorts(token=token, offset=max_data-10*chunk) | ||
df : pd.DataFrame = asyncio.run(daily_shorts( | ||
start_date=date(year=2022, month=9, day=1), | ||
end_date=date(year=2022, month=9, day=10))) | ||
``` | ||
|
||
*Scripts work as-is* | ||
|
||
## Licensing | ||
|
||
|
@@ -55,6 +51,3 @@ Use the [Issues](https://github.com/amor71/FINRAShortData/issues) section | |
## Contributing | ||
|
||
If you'd like to contribute to the project, drop me a line at mailto:[email protected] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
__version__ = "0.0.13" | ||
__version__ = "0.1.0" | ||
|
||
from .auth import auth | ||
from .daily import daily_shorts, daily_shorts_chunk_and_size | ||
from .daily import daily_shorts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,103 +1,94 @@ | ||
import asyncio | ||
import concurrent.futures | ||
import time | ||
from typing import Optional, Tuple | ||
import io | ||
from datetime import date, datetime, timezone | ||
from typing import List, Optional | ||
|
||
import pandas as pd | ||
import pandas_market_calendars | ||
import requests | ||
|
||
from .decorators import timeit | ||
|
||
url: str = "https://api.finra.org/data/group/OTCMarket/name/regShoDaily" | ||
|
||
|
||
def _requests_get(token: str, chunk_size: int, offset: int) -> pd.DataFrame: | ||
r = requests.get( | ||
url=url, | ||
headers={ | ||
"Authorization": f"Bearer {token}", | ||
"Accept": "application/json", | ||
}, | ||
params={"limit": chunk_size, "offset": offset}, | ||
) | ||
r.raise_for_status() | ||
|
||
if r.status_code in (429, 502): | ||
print(f"{url} return {r.status_code}, waiting and re-trying") | ||
time.sleep(10) | ||
return _requests_get(token, chunk_size, offset) | ||
|
||
x = r.json() | ||
df = pd.DataFrame(x) | ||
df.rename( | ||
columns={ | ||
"securitiesInformationProcessorSymbolIdentifier": "symbol", | ||
"totalParQuantity": "volume", | ||
"shortParQuantity": "shorts", | ||
"shortExemptParQuantity": "exempt", | ||
}, | ||
inplace=True, | ||
def _short_by_date(d: datetime) -> pd.DataFrame: | ||
base_url = f'https://cdn.finra.org/equity/regsho/daily/CNMSshvol{d.strftime("%Y%m%d")}.txt' | ||
content = requests.get(base_url).content | ||
df = pd.read_csv( | ||
io.StringIO(content.decode("utf-8")), | ||
sep="|", | ||
engine="python", | ||
skipfooter=1, | ||
keep_default_na=False, | ||
) | ||
df.drop(["reportingFacilityCode", "marketCode"], axis=1, inplace=True) | ||
df["date"] = d.date() | ||
|
||
if not df.empty: | ||
del df["Date"] | ||
df["ShortPercent"] = round( | ||
100.0 * df["ShortVolume"] / df["TotalVolume"], 2 | ||
) | ||
df["ShortExemptPercent"] = round( | ||
100.0 * df["ShortExemptVolume"] / df["TotalVolume"], 2 | ||
) | ||
return df.set_index(["Symbol", "date"]).sort_index().dropna() | ||
|
||
return df | ||
|
||
|
||
def daily_shorts_chunk_and_size(token: str) -> Tuple[int, int]: | ||
"""Return the optimal chunk size and total number of data-points, | ||
def _get_trading_holidays( | ||
mcal: pandas_market_calendars.MarketCalendar, | ||
) -> List[str]: | ||
return mcal.holidays().holidays | ||
|
||
Chunk size is used internally, by the daily_shorts() function | ||
to reduce the number of calls to the FINRA end-point, | ||
it is also used as the 'offset' step when calling daily_shorts() directly with restrictions. | ||
|
||
Input Arguments: token obtained from the auth() function. | ||
Returns: tuple with chunk size followed by number of data-points to be loaded from FINRA end-point. | ||
""" | ||
r = requests.get( | ||
url=url, | ||
headers={ | ||
"Authorization": f"Bearer {token}", | ||
"Accept": "application/json", | ||
}, | ||
params={"limit": 1}, | ||
def _calc_start_date_from_offset( | ||
mcal: pandas_market_calendars.MarketCalendar, end_date: date, offset: int | ||
) -> date: | ||
cbd_offset = pd.tseries.offsets.CustomBusinessDay( | ||
n=offset - 1, holidays=_get_trading_holidays(mcal) | ||
) | ||
r.raise_for_status() | ||
return int(r.headers["Record-Max-Limit"]), int(r.headers["Record-Total"]) | ||
return (datetime.now(timezone.utc) - cbd_offset).date() | ||
|
||
|
||
def _short_iterator(days: List) -> pd.DataFrame: | ||
df = pd.DataFrame() | ||
for day in days: | ||
day_df = _short_by_date(day) | ||
if not day_df.empty: | ||
df = ( | ||
day_df | ||
if df.empty | ||
else pd.concat([df, day_df], axis=0).sort_index() | ||
) | ||
|
||
return df | ||
|
||
|
||
@timeit | ||
async def daily_shorts( | ||
token: str, offset: int = 0, limit: Optional[int] = None | ||
start_date: Optional[date] = None, | ||
end_date: Optional[date] = date.today(), | ||
offset: Optional[int] = None, | ||
) -> pd.DataFrame: | ||
"""Download Daily Short details | ||
Input Arguments: | ||
token -> obtained from the auth() function. | ||
offset -> starting point (default 0). | ||
limit -> end point (default not limit). | ||
start_date -> Optional, start date for pulling short-date. | ||
end_date -> last date (inclusive) for pulling short-date. | ||
offset -> If start_date not provided, calculate start date as offset from end_date. | ||
Returns: If successful returns DataFrame with all details | ||
""" | ||
chunk_size, max_records = daily_shorts_chunk_and_size(token) | ||
|
||
if limit: | ||
max_records = min(max_records, limit) | ||
|
||
print( | ||
f"loading data (chunk_size={chunk_size}, offset={offset}, max_records={max_records-offset})..." | ||
) | ||
with concurrent.futures.ThreadPoolExecutor() as executor: | ||
loop = asyncio.get_event_loop() | ||
futures = [ | ||
loop.run_in_executor( | ||
executor, _requests_get, token, chunk_size, offset | ||
) | ||
for offset in range(offset, max_records, chunk_size) | ||
] | ||
df = ( | ||
pd.concat(await asyncio.gather(*futures)) | ||
.groupby(["tradeReportDate", "symbol"]) | ||
.sum() | ||
if not start_date and not offset: | ||
raise ValueError( | ||
"daily_shorts(): must have either start_date or offset" | ||
) | ||
elif not start_date and offset < 1: # type: ignore | ||
raise ValueError("daily_shorts(): offset >= 1") | ||
|
||
df["short_percent"] = round(100.0 * df.shorts / df.volume, 1) | ||
nyse = pandas_market_calendars.get_calendar("NYSE") | ||
if not start_date: | ||
start_date = _calc_start_date_from_offset(nyse, end_date, offset) # type: ignore | ||
|
||
return df | ||
schedule = nyse.schedule(start_date=start_date, end_date=end_date) | ||
days = schedule.index.to_list() | ||
|
||
return _short_iterator(days) if len(days) else pd.DataFrame() |
Oops, something went wrong.