diff --git a/backend/ingest/nse_importer.py b/backend/ingest/nse_importer.py index 801e9778..f1581f5c 100644 --- a/backend/ingest/nse_importer.py +++ b/backend/ingest/nse_importer.py @@ -438,72 +438,6 @@ def _process_file(self, db: Session, key: str, trade_date: date, results: dict, - # Synthesize CorporateAction records for parsed dividends - - synthesized_ca_records = [] - if key == 'board_meetings': - for r in records: - ext_amt = r.get('extracted_dividend_amount') - if ext_amt is not None and ext_amt > 0: - ext_rec_date_str = r.get('extracted_record_date') - parsed_rec_date = None - if ext_rec_date_str: - from backend.ingest.field_mapper import parse_nse_date - parsed_rec_date = parse_nse_date(ext_rec_date_str) - - # By strictly using exact strings without the appended board meeting purpose, - # we allow the generic unique constraints ['date', 'symbol', 'purpose'] - # to squash multiple same-day board meeting updates (e.g. Intimations + Financial Results) - # into a single upcoming dividend record. - purpose_str = "Dividend" if parsed_rec_date else "Dividend - Record date not yet declared" - - synthesized_ca_records.append({ - 'date': r.get('date'), - 'symbol': r.get('symbol'), - 'company_name': r.get('company_name'), - 'purpose': purpose_str, - 'parsed_dividend_amount': ext_amt, - 'dividend_type': r.get('extracted_dividend_type') or 'Final', - 'ex_date': parsed_rec_date, - 'record_date': parsed_rec_date, - 'broadcast_date': r.get('broadcast_date'), - }) - if synthesized_ca_records: - ca_model = self._get_model_class('corporate_actions') - ca_unique = self._get_unique_fields('corporate_actions') - synthesized_ca_records = self._deduplicate_records(synthesized_ca_records, ca_unique) - - # Delete old synthesized records before inserting to prevent duplicates - # We identify synthesized records by their specific "Dividend" format string - try: - from sqlalchemy import delete - # To effectively deduplicate synthesized corporate actions that might have - # drifted across different `trade_date` imports but belong to the same symbol/purpose: - for rec in synthesized_ca_records: - from sqlalchemy import or_ - # Crucially, do not filter deletions by `parsed_dividend_amount`, to ensure intimation records - # (no amount) are properly overwritten by subsequent announcement records (with amount). - # Crucial fix to preserve actual historical dividends! - # We only want to delete the synthesized records that are being replaced BY THIS EXACT EVENT. - # So we only delete synthesized placeholders from the SAME date or later (which means it's the exact same lifecycle event). - from datetime import timedelta - threshold_date = rec['date'] - timedelta(days=60) # Lifecycle events happen closely - - stmt = delete(ca_model).where( - ca_model.symbol == rec['symbol'], - ca_model.date >= threshold_date, - or_( - ca_model.purpose.like('%not yet declared%'), - ca_model.purpose == 'Dividend' - ) - ) - db.execute(stmt) - - self._insert_batch(db, ca_model, synthesized_ca_records) - logger.info(f"Inserted {len(synthesized_ca_records)} synthesized corporate actions for dividends from board meetings.") - except Exception as e: - logger.error(f"Failed to insert synthesized corporate actions: {e}") - if key == 'bhavcopy_fo': for r in records: if 'instrument_type' in r and isinstance(r['instrument_type'], str): @@ -531,7 +465,9 @@ def _process_file(self, db: Session, key: str, trade_date: date, results: dict, records = self._deduplicate_records(records, unique_fields) # Special handling for Deals, Actions, Meetings: Delete & Insert - if key == 'nse_security': + if key in ['corporate_actions', 'board_meetings']: + inserted, updated = self._upsert_batch(db, model_class, records, unique_fields) + elif key == 'nse_security': # Security Master doesn't have a date column and isn't a hypertable. We upsert on fin_instrm_id. inserted, updated = self._upsert_batch(db, model_class, records, unique_fields) else: diff --git a/backend/ingest/nse_lib.py b/backend/ingest/nse_lib.py index 568de6bb..e0cad615 100644 --- a/backend/ingest/nse_lib.py +++ b/backend/ingest/nse_lib.py @@ -617,12 +617,40 @@ def get_board_meetings(self, trade_date: date) -> pd.DataFrame: purpose = str(item.get('bm_purpose', '')).lower() desc = str(item.get('bm_desc', '')).lower() + symbol = item.get('bm_symbol') - if 'dividend' in purpose or 'dividend' in desc: - symbol = item.get('bm_symbol') + # We want to check for dividend announcements even if the main purpose says "Financial Results" + # But we only proceed if we find a dividend mention in the purpose, desc, OR if there's a matching corporate announcement + # We MUST correlate the dates to prevent flagging every board meeting for this company! + has_dividend_mention = 'dividend' in purpose or 'dividend' in desc + + try: + bm_date_obj_check = datetime.strptime(item.get('bm_date', ''), "%d-%b-%Y").date() + except ValueError: + bm_date_obj_check = None + + if not has_dividend_mention and symbol and symbol in symbol_announcements and bm_date_obj_check: + for ann in symbol_announcements[symbol]: + if 'dividend' in str(ann.get('subject', '')).lower(): + ann_date_str = ann.get('an_dt', '') + try: + ann_date_obj = datetime.strptime(ann_date_str.split(' ')[0], "%d-%b-%Y").date() + if abs((ann_date_obj - bm_date_obj_check).days) <= 5: + has_dividend_mention = True + break + except ValueError: + pass + + is_agm = 'annual general meeting' in purpose or 'agm' in purpose + + if has_dividend_mention or is_agm: found_amount = None found_record_date = None - found_type = 'Final' + found_type = 'Final' if 'interim' not in purpose and 'special' not in purpose else ('Interim' if 'interim' in purpose else 'Special') + + if is_agm: + found_type = 'AGM' + item['bm_purpose'] = 'Annual General Meeting' # First try mapping to CA data for dates if symbol and symbol in symbol_ca_map: @@ -687,17 +715,25 @@ def get_board_meetings(self, trade_date: date) -> pd.DataFrame: # Extract Amount if found_amount is None: - _clean_text = re.sub(r'(?:face value|fv|paid-up capital|paid up capital|equity shares? of|shares? of)\s*(?:of\s*)?(?:rs\.?|re\.?|rupees?|inr|[-/]|\s|\u20b9)*\d+(?:\.\d+)?(?:/-)?(?:\s*each)?', '', attchmntText, flags=re.IGNORECASE) - - if 'including' in _clean_text.lower() or 'includes' in _clean_text.lower(): - match = re.search(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', _clean_text, re.IGNORECASE) - if match: - found_amount = float(match.group(1)) - else: - div_pattern = re.compile(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', re.IGNORECASE) - matches = div_pattern.findall(_clean_text) - if matches: - found_amount = sum(float(m) for m in matches) + # Check XBRL format first (e.g. Rs 0.50 per share) + xbrl_matches = re.findall(r'<[^>]*Dividend[^>]*>.*?Rs\.?\s*(\d+(?:\.\d+)?).*?]*>', attchmntText, re.IGNORECASE) + if not xbrl_matches: + xbrl_matches = re.findall(r'<[^>]*Dividend[^>]*>.*?(\d+(?:\.\d+)?).*?]*>', attchmntText, re.IGNORECASE) + if xbrl_matches: + found_amount = sum(float(m) for m in xbrl_matches) + + if found_amount is None: + _clean_text = re.sub(r'(?:face value|fv|paid-up capital|paid up capital|equity shares? of|shares? of)\s*(?:of\s*)?(?:rs\.?|re\.?|rupees?|inr|[-/]|\s|\u20b9)*\d+(?:\.\d+)?(?:/-)?(?:\s*each)?', '', attchmntText, flags=re.IGNORECASE) + + if 'including' in _clean_text.lower() or 'includes' in _clean_text.lower(): + match = re.search(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', _clean_text, re.IGNORECASE) + if match: + found_amount = float(match.group(1)) + else: + div_pattern = re.compile(r'(?:rs\.?|re\.?|rupees?|inr|\u20b9)\s*(\d+(?:\.\d+)?)', re.IGNORECASE) + matches = div_pattern.findall(_clean_text) + if matches: + found_amount = sum(float(m) for m in matches) # Extract Record Date if found_record_date is None: diff --git a/backend/tests/test_ui_verify.py b/backend/tests/test_ui_verify.py new file mode 100644 index 00000000..0aa0a2d7 --- /dev/null +++ b/backend/tests/test_ui_verify.py @@ -0,0 +1,9 @@ +from playwright.sync_api import sync_playwright + +def run_cuj(page): + # For a purely backend-rendered UI or locally hosted app, we'll try to load the page. + # Given the project uses FastAPI, we first need to make sure the app is running. + pass + +if __name__ == "__main__": + pass diff --git a/backend/ui/templates/workbench.html b/backend/ui/templates/workbench.html index 2a60f899..70296c55 100644 --- a/backend/ui/templates/workbench.html +++ b/backend/ui/templates/workbench.html @@ -3229,7 +3229,10 @@

API Key Management (Secure Session)

const meetings = meetingsBySymbol[sym]; meetings.forEach(m => { const purpose = (m.purpose || '').toLowerCase(); - if (purpose.includes('dividend') || purpose.includes('bonus') || purpose.includes('split') || purpose.includes('sub-division')) { + const hasAmount = m.extracted_dividend_amount != null && m.extracted_dividend_amount !== ''; + const isAGM = purpose.includes('agm') || purpose.includes('annual general meeting'); + + if (purpose.includes('dividend') || purpose.includes('bonus') || purpose.includes('split') || purpose.includes('sub-division') || hasAmount || isAGM) { // Check if there is a corporate action after this meeting date const mDate = m.meeting_date ? new Date(m.meeting_date) : null; let hasLinkedAction = false; @@ -3294,7 +3297,7 @@

API Key Management (Secure Session)

let amount = parseFloat(m.extracted_dividend_amount) || null; if (amount == null && m.parsed_dividend_amount) amount = m.parsed_dividend_amount; let divType = '-'; - if (purpose.includes('dividend')) { + if (purpose.includes('dividend') || hasAmount) { divType = 'Dividend'; if (purpose.includes('interim')) divType = 'Interim'; if (purpose.includes('special')) divType = 'Special'; @@ -3346,24 +3349,60 @@

API Key Management (Secure Session)

Object.values(symGroups).forEach(group => { let processedOfficials = new Set(); - // Try to link each intimation (synthetic) to its final corporate action (official) + // Deduplicate synthetics (multiple board meetings for the same event) + let deduplicatedSynthetics = []; + group.synthetics.sort((a, b) => new Date(b.broadcast_date || b.date) - new Date(a.broadcast_date || a.date)); // Sort newest first + group.synthetics.forEach(syn => { + let isDuplicate = false; + const synMeetingDate = syn._matchedMeeting && syn._matchedMeeting.meeting_date ? new Date(syn._matchedMeeting.meeting_date).getTime() : NaN; + + for (let existing of deduplicatedSynthetics) { + const existingMeetingDate = existing._matchedMeeting && existing._matchedMeeting.meeting_date ? new Date(existing._matchedMeeting.meeting_date).getTime() : NaN; + // If meetings are within 5 days and have the same purpose/type, treat as duplicate + if (!isNaN(synMeetingDate) && !isNaN(existingMeetingDate)) { + const diffDays = Math.abs(synMeetingDate - existingMeetingDate) / (1000 * 60 * 60 * 24); + if (diffDays <= 5 && syn.dividend_type === existing.dividend_type) { + isDuplicate = true; + // Update amount if the newer duplicate has it + if ((!existing.parsed_dividend_amount || existing.parsed_dividend_amount === '-') && syn.parsed_dividend_amount) { + existing.parsed_dividend_amount = syn.parsed_dividend_amount; + } + break; + } + } + } + + if (!isDuplicate) { + deduplicatedSynthetics.push(syn); + } + }); + + // Try to link each intimation (synthetic) to its final corporate action (official) + deduplicatedSynthetics.forEach(syn => { let matched = false; const synDate = new Date(syn.broadcast_date || syn.date).getTime(); - for (let off of group.officials) { - if (processedOfficials.has(off)) continue; + // Sort officials by closest date first to prevent linking to a distant corporate action + let availableOfficials = group.officials.filter(o => !processedOfficials.has(o)); + availableOfficials.sort((a, b) => { + const aDate = new Date(a.ex_date || a.record_date || a.broadcast_date || a.date).getTime(); + const bDate = new Date(b.ex_date || b.record_date || b.broadcast_date || b.date).getTime(); + return Math.abs(aDate - synDate) - Math.abs(bDate - synDate); + }); + for (let off of availableOfficials) { const offDate = new Date(off.ex_date || off.record_date || off.broadcast_date || off.date).getTime(); if (!isNaN(synDate) && !isNaN(offDate)) { const diffDays = (offDate - synDate) / (1000 * 60 * 60 * 24); - // If the corporate action happens 0 to 90 days after the board meeting, it's the same cycle! - // We also check that the dividend types match (e.g. Interim goes to Interim). + // If the corporate action happens -10 to 90 days after the board meeting, it's the same cycle! + // We strictly check that the dividend types match. if (diffDays >= -10 && diffDays <= 90 && (syn.dividend_type === off.dividend_type || syn.dividend_type === '-')) { - // Link them! Merge the intimation's broadcast date into the official corporate action. - off.broadcast_date = syn.broadcast_date || off.broadcast_date; + // Link them! + // Do not overwrite the official broadcast_date with the intimation's date, + // as we want to preserve the actual corporate action announcement time. if (!off.parsed_dividend_amount || off.parsed_dividend_amount === "-") { off.parsed_dividend_amount = syn.parsed_dividend_amount; } @@ -3494,6 +3533,25 @@

API Key Management (Secure Session)

tbodyActions.innerHTML = 'No data found matching criteria.'; } + const formatDateTime = (dateStr) => { + if (!dateStr || dateStr === '-') return '-'; + try { + let dt = new Date(dateStr); + if (isNaN(dt.getTime())) return dateStr; // fallback if invalid + let y = dt.getFullYear(), m = ('0' + (dt.getMonth() + 1)).slice(-2), dy = ('0' + dt.getDate()).slice(-2); + let datePart = `${y}-${m}-${dy}`; + + // Try to extract time from original string + let timePart = dateStr.includes('T') ? dateStr.split('T')[1].split('+')[0].split('.')[0] : (dateStr.includes(' ') ? dateStr.split(' ')[1] : null); + if (timePart && timePart !== '00:00:00') { + return `${datePart} ${timePart}`; + } + return datePart; + } catch(e) { + return dateStr; + } + }; + filteredActions.forEach(d => { const tr = document.createElement('tr'); @@ -3508,18 +3566,9 @@

API Key Management (Secure Session)

if (d.is_synthetic && d._matchedMeeting) { let matchedMeeting = d._matchedMeeting; - bmd = matchedMeeting.meeting_date || '-'; + bmd = formatDateTime(matchedMeeting.meeting_date); bmPurpose = matchedMeeting.purpose || '-'; - - let bcdRaw = matchedMeeting.broadcast_date || matchedMeeting.date; - if (bcdRaw) { - try { - let dt = new Date(bcdRaw); - if (!isNaN(dt.getTime())) { - bcd = dt.toISOString().split('T')[0]; - } - } catch(e) {} - } + bcd = formatDateTime(matchedMeeting.broadcast_date || matchedMeeting.date); } else if (matchingMeetings.length > 0) { let exDateObj = d.ex_date ? new Date(d.ex_date) : null; @@ -3554,29 +3603,18 @@

API Key Management (Secure Session)

} if (matchedMeeting) { - bmd = matchedMeeting.meeting_date || '-'; + bmd = formatDateTime(matchedMeeting.meeting_date); bmPurpose = matchedMeeting.purpose || '-'; - - let bcdRaw = matchedMeeting.broadcast_date || matchedMeeting.date; - if (bcdRaw) { - try { - let dt = new Date(bcdRaw); - if (!isNaN(dt.getTime())) { - bcd = dt.toISOString().split('T')[0]; - } - } catch(e) {} - } } } - // final fallback if no meeting was matched - if (bcd === '-' && d.broadcast_date) { - try { - let dt = new Date(d.broadcast_date); - if (!isNaN(dt.getTime())) { - bcd = dt.toISOString().split('T')[0]; - } - } catch(e) {} + // Always prioritize the official action's broadcast date if it has one, + // otherwise fallback to the intimation's broadcast date. + let officialBcd = formatDateTime(d.broadcast_date); + if (officialBcd !== '-') { + bcd = officialBcd; + } else if (bcd === '-') { + bcd = formatDateTime(d.date); } let fullPurpose = d.subject || d.purpose || '-'; diff --git a/backend/web/api/data/special_sit_routes.py b/backend/web/api/data/special_sit_routes.py index 91ff0aab..675485a0 100644 --- a/backend/web/api/data/special_sit_routes.py +++ b/backend/web/api/data/special_sit_routes.py @@ -181,16 +181,28 @@ def get_special_sit_dividends(db: Session = Depends(get_db)): for h in history: # We match to a BM just to get its intimation date (broadcast_date), nothing else. We don't delete anything. if h.get('dividend_type') not in ['Bonus', 'Split', 'Demerger']: - for bm in bms: - if bm.extracted_dividend_type == h['dividend_type'] or not bm.extracted_dividend_type: - ca_date = h['ex_date_obj'] or h.get('announcement_date_obj') - if ca_date and bm.date: - diff = (ca_date - bm.date).days - if -10 <= diff <= 90: - h['broadcast_date'] = bm.broadcast_date or h.get('broadcast_date') - h['announcement_date_obj'] = bm.date - bms.remove(bm) # Consume the BM so it doesn't duplicate - break + # Sort board meetings by proximity to the corporate action to find the best match + ca_date = h['ex_date_obj'] or h.get('announcement_date_obj') + if ca_date: + best_bm = None + min_diff = float('inf') + for bm in bms: + if bm.extracted_dividend_type == h['dividend_type'] or not bm.extracted_dividend_type: + if bm.date: + diff = (ca_date - bm.date).days + # Accept if CA happens -10 to 90 days after BM + if -10 <= diff <= 90 and abs(diff) < min_diff: + min_diff = abs(diff) + best_bm = bm + if best_bm: + h['broadcast_date'] = best_bm.broadcast_date or h.get('broadcast_date') + h['announcement_date_obj'] = best_bm.date + # If the CA is missing an amount but the BM has it, backfill it + if not h.get('amount') and best_bm.extracted_dividend_amount: + h['amount'] = best_bm.extracted_dividend_amount + h['raw_amount'] = best_bm.extracted_dividend_amount + bms.remove(best_bm) # Consume the BM so it doesn't duplicate + chained_history.append(h) # Append remaining BMs that haven't dropped an official CA yet (Upcoming Dividends/Intimations)