Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 3 additions & 67 deletions backend/ingest/nse_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,72 +438,6 @@ def _process_file(self, db: Session, key: str, trade_date: date, results: dict,



# Synthesize CorporateAction records for parsed dividends

synthesized_ca_records = []
if key == 'board_meetings':
for r in records:
ext_amt = r.get('extracted_dividend_amount')
if ext_amt is not None and ext_amt > 0:
ext_rec_date_str = r.get('extracted_record_date')
parsed_rec_date = None
if ext_rec_date_str:
from backend.ingest.field_mapper import parse_nse_date
parsed_rec_date = parse_nse_date(ext_rec_date_str)

# By strictly using exact strings without the appended board meeting purpose,
# we allow the generic unique constraints ['date', 'symbol', 'purpose']
# to squash multiple same-day board meeting updates (e.g. Intimations + Financial Results)
# into a single upcoming dividend record.
purpose_str = "Dividend" if parsed_rec_date else "Dividend - Record date not yet declared"

synthesized_ca_records.append({
'date': r.get('date'),
'symbol': r.get('symbol'),
'company_name': r.get('company_name'),
'purpose': purpose_str,
'parsed_dividend_amount': ext_amt,
'dividend_type': r.get('extracted_dividend_type') or 'Final',
'ex_date': parsed_rec_date,
'record_date': parsed_rec_date,
'broadcast_date': r.get('broadcast_date'),
})
if synthesized_ca_records:
ca_model = self._get_model_class('corporate_actions')
ca_unique = self._get_unique_fields('corporate_actions')
synthesized_ca_records = self._deduplicate_records(synthesized_ca_records, ca_unique)

# Delete old synthesized records before inserting to prevent duplicates
# We identify synthesized records by their specific "Dividend" format string
try:
from sqlalchemy import delete
# To effectively deduplicate synthesized corporate actions that might have
# drifted across different `trade_date` imports but belong to the same symbol/purpose:
for rec in synthesized_ca_records:
from sqlalchemy import or_
# Crucially, do not filter deletions by `parsed_dividend_amount`, to ensure intimation records
# (no amount) are properly overwritten by subsequent announcement records (with amount).
# Crucial fix to preserve actual historical dividends!
# We only want to delete the synthesized records that are being replaced BY THIS EXACT EVENT.
# So we only delete synthesized placeholders from the SAME date or later (which means it's the exact same lifecycle event).
from datetime import timedelta
threshold_date = rec['date'] - timedelta(days=60) # Lifecycle events happen closely

stmt = delete(ca_model).where(
ca_model.symbol == rec['symbol'],
ca_model.date >= threshold_date,
or_(
ca_model.purpose.like('%not yet declared%'),
ca_model.purpose == 'Dividend'
)
)
db.execute(stmt)

self._insert_batch(db, ca_model, synthesized_ca_records)
logger.info(f"Inserted {len(synthesized_ca_records)} synthesized corporate actions for dividends from board meetings.")
except Exception as e:
logger.error(f"Failed to insert synthesized corporate actions: {e}")

if key == 'bhavcopy_fo':
for r in records:
if 'instrument_type' in r and isinstance(r['instrument_type'], str):
Expand Down Expand Up @@ -531,7 +465,9 @@ def _process_file(self, db: Session, key: str, trade_date: date, results: dict,
records = self._deduplicate_records(records, unique_fields)

# Special handling for Deals, Actions, Meetings: Delete & Insert
if key == 'nse_security':
if key in ['corporate_actions', 'board_meetings']:
inserted, updated = self._upsert_batch(db, model_class, records, unique_fields)
elif key == 'nse_security':
# Security Master doesn't have a date column and isn't a hypertable. We upsert on fin_instrm_id.
inserted, updated = self._upsert_batch(db, model_class, records, unique_fields)
else:
Expand Down
64 changes: 50 additions & 14 deletions backend/ingest/nse_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,12 +617,40 @@ def get_board_meetings(self, trade_date: date) -> pd.DataFrame:

purpose = str(item.get('bm_purpose', '')).lower()
desc = str(item.get('bm_desc', '')).lower()
symbol = item.get('bm_symbol')

if 'dividend' in purpose or 'dividend' in desc:
symbol = item.get('bm_symbol')
# We want to check for dividend announcements even if the main purpose says "Financial Results"
# But we only proceed if we find a dividend mention in the purpose, desc, OR if there's a matching corporate announcement
# We MUST correlate the dates to prevent flagging every board meeting for this company!
has_dividend_mention = 'dividend' in purpose or 'dividend' in desc

try:
bm_date_obj_check = datetime.strptime(item.get('bm_date', ''), "%d-%b-%Y").date()
except ValueError:
bm_date_obj_check = None

if not has_dividend_mention and symbol and symbol in symbol_announcements and bm_date_obj_check:
for ann in symbol_announcements[symbol]:
if 'dividend' in str(ann.get('subject', '')).lower():
ann_date_str = ann.get('an_dt', '')
try:
ann_date_obj = datetime.strptime(ann_date_str.split(' ')[0], "%d-%b-%Y").date()
if abs((ann_date_obj - bm_date_obj_check).days) <= 5:
has_dividend_mention = True
break
except ValueError:
pass

is_agm = 'annual general meeting' in purpose or 'agm' in purpose

if has_dividend_mention or is_agm:
found_amount = None
found_record_date = None
found_type = 'Final'
found_type = 'Final' if 'interim' not in purpose and 'special' not in purpose else ('Interim' if 'interim' in purpose else 'Special')

if is_agm:
found_type = 'AGM'
item['bm_purpose'] = 'Annual General Meeting'

# First try mapping to CA data for dates
if symbol and symbol in symbol_ca_map:
Expand Down Expand Up @@ -687,17 +715,25 @@ def get_board_meetings(self, trade_date: date) -> pd.DataFrame:

# Extract Amount
if found_amount is None:
_clean_text = re.sub(r'(?:face value|fv|paid-up capital|paid up capital|equity shares? of|shares? of)\s*(?:of\s*)?(?:rs\.?|re\.?|rupees?|inr|[-/]|\s|\u20b9)*\d+(?:\.\d+)?(?:/-)?(?:\s*each)?', '', attchmntText, flags=re.IGNORECASE)

if 'including' in _clean_text.lower() or 'includes' in _clean_text.lower():
match = re.search(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', _clean_text, re.IGNORECASE)
if match:
found_amount = float(match.group(1))
else:
div_pattern = re.compile(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', re.IGNORECASE)
matches = div_pattern.findall(_clean_text)
if matches:
found_amount = sum(float(m) for m in matches)
# Check XBRL format first (e.g. <in-capmkt:RateOfFinalDividendRecommendedPerEquityShare>Rs 0.50 per share</in-capmkt:RateOfFinalDividendRecommendedPerEquityShare>)
xbrl_matches = re.findall(r'<[^>]*Dividend[^>]*>.*?Rs\.?\s*(\d+(?:\.\d+)?).*?</[^>]*>', attchmntText, re.IGNORECASE)
if not xbrl_matches:
xbrl_matches = re.findall(r'<[^>]*Dividend[^>]*>.*?(\d+(?:\.\d+)?).*?</[^>]*>', attchmntText, re.IGNORECASE)
if xbrl_matches:
found_amount = sum(float(m) for m in xbrl_matches)

if found_amount is None:
_clean_text = re.sub(r'(?:face value|fv|paid-up capital|paid up capital|equity shares? of|shares? of)\s*(?:of\s*)?(?:rs\.?|re\.?|rupees?|inr|[-/]|\s|\u20b9)*\d+(?:\.\d+)?(?:/-)?(?:\s*each)?', '', attchmntText, flags=re.IGNORECASE)

if 'including' in _clean_text.lower() or 'includes' in _clean_text.lower():
match = re.search(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', _clean_text, re.IGNORECASE)
if match:
found_amount = float(match.group(1))
else:
div_pattern = re.compile(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', re.IGNORECASE)
matches = div_pattern.findall(_clean_text)
if matches:
found_amount = sum(float(m) for m in matches)

# Extract Record Date
if found_record_date is None:
Expand Down
9 changes: 9 additions & 0 deletions backend/tests/test_ui_verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from playwright.sync_api import sync_playwright

def run_cuj(page):
# For a purely backend-rendered UI or locally hosted app, we'll try to load the page.
# Given the project uses FastAPI, we first need to make sure the app is running.
pass

if __name__ == "__main__":
pass
Loading