diff --git a/docs/api/NewsService.openapi.json b/docs/api/NewsService.openapi.json index 962295777d..b7c1a5c334 100644 --- a/docs/api/NewsService.openapi.json +++ b/docs/api/NewsService.openapi.json @@ -1 +1 @@ -{"components":{"schemas":{"CategoriesEntry":{"properties":{"key":{"type":"string"},"value":{"$ref":"#/components/schemas/CategoryBucket"}},"type":"object"},"CategoryBucket":{"properties":{"items":{"items":{"$ref":"#/components/schemas/NewsItem"},"type":"array"}},"type":"object"},"Error":{"description":"Error is returned when a handler encounters an error. It contains a simple error message that the developer can customize.","properties":{"message":{"description":"Error message (e.g., 'user not found', 'database connection failed')","type":"string"}},"type":"object"},"FeedStatusesEntry":{"properties":{"key":{"type":"string"},"value":{"type":"string"}},"type":"object"},"FieldViolation":{"description":"FieldViolation describes a single validation error for a specific field.","properties":{"description":{"description":"Human-readable description of the validation violation (e.g., 'must be a valid email address', 'required field missing')","type":"string"},"field":{"description":"The field path that failed validation (e.g., 'user.email' for nested fields). For header validation, this will be the header name (e.g., 'X-API-Key')","type":"string"}},"required":["field","description"],"type":"object"},"GeoCoordinates":{"description":"GeoCoordinates represents a geographic location using WGS84 coordinates.","properties":{"latitude":{"description":"Latitude in decimal degrees (-90 to 90).","format":"double","maximum":90,"minimum":-90,"type":"number"},"longitude":{"description":"Longitude in decimal degrees (-180 to 180).","format":"double","maximum":180,"minimum":-180,"type":"number"}},"type":"object"},"GetSummarizeArticleCacheRequest":{"description":"GetSummarizeArticleCacheRequest looks up a pre-computed summary by cache key.","properties":{"cacheKey":{"description":"Deterministic cache key computed by buildSummaryCacheKey().","type":"string"}},"type":"object"},"ListFeedDigestRequest":{"properties":{"lang":{"description":"ISO 639-1 language code (en, fr, ar, etc.)","type":"string"},"variant":{"description":"Site variant: full, tech, finance, happy","type":"string"}},"type":"object"},"ListFeedDigestResponse":{"properties":{"categories":{"additionalProperties":{"$ref":"#/components/schemas/CategoryBucket"},"description":"Per-category buckets — keys match category names from feed config","type":"object"},"feedStatuses":{"additionalProperties":{"type":"string"},"description":"Per-feed status — only non-ok states emitted; absent key implies ok.\n Values: empty (feed returned 0 items), timeout (timed out during fetch).","type":"object"},"generatedAt":{"description":"ISO 8601 timestamp of when this digest was generated","type":"string"}},"type":"object"},"NewsItem":{"description":"NewsItem represents a single news article from RSS feed aggregation.","properties":{"corroborationCount":{"description":"Number of distinct sources that reported the same story in this digest cycle.","format":"int32","type":"integer"},"importanceScore":{"description":"Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%.\n Absent (0) when not yet scored.","format":"int32","type":"integer"},"isAlert":{"description":"Whether this article triggered an alert condition.","type":"boolean"},"link":{"description":"Article URL.","type":"string"},"location":{"$ref":"#/components/schemas/GeoCoordinates"},"locationName":{"description":"Human-readable location name.","type":"string"},"publishedAt":{"description":"Publication time, as Unix epoch milliseconds.. Warning: Values \u003e 2^53 may lose precision in JavaScript","format":"int64","type":"integer"},"source":{"description":"Source feed name.","minLength":1,"type":"string"},"storyPhase":{"description":"StoryPhase represents the lifecycle stage of a tracked news story.","enum":["STORY_PHASE_UNSPECIFIED","STORY_PHASE_BREAKING","STORY_PHASE_DEVELOPING","STORY_PHASE_SUSTAINED","STORY_PHASE_FADING"],"type":"string"},"threat":{"$ref":"#/components/schemas/ThreatClassification"},"title":{"description":"Article headline.","minLength":1,"type":"string"}},"required":["source","title"],"type":"object"},"SummarizeArticleRequest":{"description":"SummarizeArticleRequest specifies parameters for LLM article summarization.","properties":{"geoContext":{"description":"Geographic signal context to include in the prompt.","type":"string"},"headlines":{"items":{"description":"Headlines to summarize (max 8 used).","minItems":1,"type":"string"},"minItems":1,"type":"array"},"lang":{"description":"Output language code, default \"en\".","type":"string"},"mode":{"description":"Summarization mode: \"brief\", \"analysis\", \"translate\", \"\" (default).","type":"string"},"provider":{"description":"LLM provider: \"ollama\", \"groq\", \"openrouter\"","minLength":1,"type":"string"},"systemAppend":{"description":"Optional system prompt append for analytical framework instructions.","type":"string"},"variant":{"description":"Variant: \"full\", \"tech\", or target language for translate mode.","type":"string"}},"required":["provider"],"type":"object"},"SummarizeArticleResponse":{"description":"SummarizeArticleResponse contains the LLM summarization result.","properties":{"error":{"description":"Error message if the request failed.","type":"string"},"errorType":{"description":"Error type/name (e.g. \"TypeError\").","type":"string"},"fallback":{"description":"Whether the client should try the next provider in the fallback chain.","type":"boolean"},"model":{"description":"Model identifier used for generation.","type":"string"},"provider":{"description":"Provider that produced the result (or \"cache\").","type":"string"},"status":{"description":"SummarizeStatus indicates the outcome of a summarization request.","enum":["SUMMARIZE_STATUS_UNSPECIFIED","SUMMARIZE_STATUS_SUCCESS","SUMMARIZE_STATUS_CACHED","SUMMARIZE_STATUS_SKIPPED","SUMMARIZE_STATUS_ERROR"],"type":"string"},"statusDetail":{"description":"Human-readable detail for non-success statuses (skip reason, etc.).","type":"string"},"summary":{"description":"The generated summary text.","type":"string"},"tokens":{"description":"Token count from the LLM response.","format":"int32","type":"integer"}},"type":"object"},"ThreatClassification":{"description":"ThreatClassification represents an AI-assessed threat level for a news item.","properties":{"category":{"description":"Event category.","type":"string"},"confidence":{"description":"Confidence score (0.0 to 1.0).","format":"double","maximum":1,"minimum":0,"type":"number"},"level":{"description":"ThreatLevel represents the assessed threat level of a news event.","enum":["THREAT_LEVEL_UNSPECIFIED","THREAT_LEVEL_LOW","THREAT_LEVEL_MEDIUM","THREAT_LEVEL_HIGH","THREAT_LEVEL_CRITICAL"],"type":"string"},"source":{"description":"Classification source — \"keyword\", \"ml\", or \"llm\".","type":"string"}},"type":"object"},"ValidationError":{"description":"ValidationError is returned when request validation fails. It contains a list of field violations describing what went wrong.","properties":{"violations":{"description":"List of validation violations","items":{"$ref":"#/components/schemas/FieldViolation"},"type":"array"}},"required":["violations"],"type":"object"}}},"info":{"title":"NewsService API","version":"1.0.0"},"openapi":"3.1.0","paths":{"/api/news/v1/list-feed-digest":{"get":{"description":"ListFeedDigest returns a pre-aggregated digest of all RSS feeds for a site variant.","operationId":"ListFeedDigest","parameters":[{"description":"Site variant: full, tech, finance, happy","in":"query","name":"variant","required":false,"schema":{"type":"string"}},{"description":"ISO 639-1 language code (en, fr, ar, etc.)","in":"query","name":"lang","required":false,"schema":{"type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListFeedDigestResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"ListFeedDigest","tags":["NewsService"]}},"/api/news/v1/summarize-article":{"post":{"description":"SummarizeArticle generates an LLM summary with provider selection and fallback support.","operationId":"SummarizeArticle","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"SummarizeArticle","tags":["NewsService"]}},"/api/news/v1/summarize-article-cache":{"get":{"description":"GetSummarizeArticleCache looks up a cached summary by deterministic key (CDN-cacheable GET).","operationId":"GetSummarizeArticleCache","parameters":[{"description":"Deterministic cache key computed by buildSummaryCacheKey().","in":"query","name":"cache_key","required":false,"schema":{"type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"GetSummarizeArticleCache","tags":["NewsService"]}}}} \ No newline at end of file +{"components":{"schemas":{"CategoriesEntry":{"properties":{"key":{"type":"string"},"value":{"$ref":"#/components/schemas/CategoryBucket"}},"type":"object"},"CategoryBucket":{"properties":{"items":{"items":{"$ref":"#/components/schemas/NewsItem"},"type":"array"}},"type":"object"},"Error":{"description":"Error is returned when a handler encounters an error. It contains a simple error message that the developer can customize.","properties":{"message":{"description":"Error message (e.g., 'user not found', 'database connection failed')","type":"string"}},"type":"object"},"FeedStatusesEntry":{"properties":{"key":{"type":"string"},"value":{"type":"string"}},"type":"object"},"FieldViolation":{"description":"FieldViolation describes a single validation error for a specific field.","properties":{"description":{"description":"Human-readable description of the validation violation (e.g., 'must be a valid email address', 'required field missing')","type":"string"},"field":{"description":"The field path that failed validation (e.g., 'user.email' for nested fields). For header validation, this will be the header name (e.g., 'X-API-Key')","type":"string"}},"required":["field","description"],"type":"object"},"GeoCoordinates":{"description":"GeoCoordinates represents a geographic location using WGS84 coordinates.","properties":{"latitude":{"description":"Latitude in decimal degrees (-90 to 90).","format":"double","maximum":90,"minimum":-90,"type":"number"},"longitude":{"description":"Longitude in decimal degrees (-180 to 180).","format":"double","maximum":180,"minimum":-180,"type":"number"}},"type":"object"},"GetSummarizeArticleCacheRequest":{"description":"GetSummarizeArticleCacheRequest looks up a pre-computed summary by cache key.","properties":{"cacheKey":{"description":"Deterministic cache key computed by buildSummaryCacheKey().","type":"string"}},"type":"object"},"ListFeedDigestRequest":{"properties":{"lang":{"description":"ISO 639-1 language code (en, fr, ar, etc.)","type":"string"},"variant":{"description":"Site variant: full, tech, finance, happy","type":"string"}},"type":"object"},"ListFeedDigestResponse":{"properties":{"categories":{"additionalProperties":{"$ref":"#/components/schemas/CategoryBucket"},"description":"Per-category buckets — keys match category names from feed config","type":"object"},"feedStatuses":{"additionalProperties":{"type":"string"},"description":"Per-feed status — only non-ok states emitted; absent key implies ok.\n Values: empty (feed returned 0 items), timeout (timed out during fetch).","type":"object"},"generatedAt":{"description":"ISO 8601 timestamp of when this digest was generated","type":"string"}},"type":"object"},"NewsItem":{"description":"NewsItem represents a single news article from RSS feed aggregation.","properties":{"corroborationCount":{"description":"Number of distinct sources that reported the same story in this digest cycle.","format":"int32","type":"integer"},"importanceScore":{"description":"Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%.","format":"int32","type":"integer"},"isAlert":{"description":"Whether this article triggered an alert condition.","type":"boolean"},"link":{"description":"Article URL.","type":"string"},"location":{"$ref":"#/components/schemas/GeoCoordinates"},"locationName":{"description":"Human-readable location name.","type":"string"},"publishedAt":{"description":"Publication time, as Unix epoch milliseconds.. Warning: Values \u003e 2^53 may lose precision in JavaScript","format":"int64","type":"integer"},"source":{"description":"Source feed name.","minLength":1,"type":"string"},"storyMeta":{"$ref":"#/components/schemas/StoryMeta"},"threat":{"$ref":"#/components/schemas/ThreatClassification"},"title":{"description":"Article headline.","minLength":1,"type":"string"}},"required":["source","title"],"type":"object"},"StoryMeta":{"description":"StoryMeta carries cross-cycle persistence data attached to each news item.","properties":{"firstSeen":{"description":"Epoch ms when the story first appeared in any digest cycle.. Warning: Values \u003e 2^53 may lose precision in JavaScript","format":"int64","type":"integer"},"mentionCount":{"description":"Total number of digest cycles in which this story appeared.","format":"int32","type":"integer"},"phase":{"description":"StoryPhase represents the lifecycle stage of a tracked news story.","enum":["STORY_PHASE_UNSPECIFIED","STORY_PHASE_BREAKING","STORY_PHASE_DEVELOPING","STORY_PHASE_SUSTAINED","STORY_PHASE_FADING"],"type":"string"},"sourceCount":{"description":"Number of unique sources that reported this story (cached from Redis Set).","format":"int32","type":"integer"}},"type":"object"},"SummarizeArticleRequest":{"description":"SummarizeArticleRequest specifies parameters for LLM article summarization.","properties":{"geoContext":{"description":"Geographic signal context to include in the prompt.","type":"string"},"headlines":{"items":{"description":"Headlines to summarize (max 8 used).","minItems":1,"type":"string"},"minItems":1,"type":"array"},"lang":{"description":"Output language code, default \"en\".","type":"string"},"mode":{"description":"Summarization mode: \"brief\", \"analysis\", \"translate\", \"\" (default).","type":"string"},"provider":{"description":"LLM provider: \"ollama\", \"groq\", \"openrouter\"","minLength":1,"type":"string"},"systemAppend":{"description":"Optional system prompt append for analytical framework instructions.","type":"string"},"variant":{"description":"Variant: \"full\", \"tech\", or target language for translate mode.","type":"string"}},"required":["provider"],"type":"object"},"SummarizeArticleResponse":{"description":"SummarizeArticleResponse contains the LLM summarization result.","properties":{"error":{"description":"Error message if the request failed.","type":"string"},"errorType":{"description":"Error type/name (e.g. \"TypeError\").","type":"string"},"fallback":{"description":"Whether the client should try the next provider in the fallback chain.","type":"boolean"},"model":{"description":"Model identifier used for generation.","type":"string"},"provider":{"description":"Provider that produced the result (or \"cache\").","type":"string"},"status":{"description":"SummarizeStatus indicates the outcome of a summarization request.","enum":["SUMMARIZE_STATUS_UNSPECIFIED","SUMMARIZE_STATUS_SUCCESS","SUMMARIZE_STATUS_CACHED","SUMMARIZE_STATUS_SKIPPED","SUMMARIZE_STATUS_ERROR"],"type":"string"},"statusDetail":{"description":"Human-readable detail for non-success statuses (skip reason, etc.).","type":"string"},"summary":{"description":"The generated summary text.","type":"string"},"tokens":{"description":"Token count from the LLM response.","format":"int32","type":"integer"}},"type":"object"},"ThreatClassification":{"description":"ThreatClassification represents an AI-assessed threat level for a news item.","properties":{"category":{"description":"Event category.","type":"string"},"confidence":{"description":"Confidence score (0.0 to 1.0).","format":"double","maximum":1,"minimum":0,"type":"number"},"level":{"description":"ThreatLevel represents the assessed threat level of a news event.","enum":["THREAT_LEVEL_UNSPECIFIED","THREAT_LEVEL_LOW","THREAT_LEVEL_MEDIUM","THREAT_LEVEL_HIGH","THREAT_LEVEL_CRITICAL"],"type":"string"},"source":{"description":"Classification source — \"keyword\", \"ml\", or \"llm\".","type":"string"}},"type":"object"},"ValidationError":{"description":"ValidationError is returned when request validation fails. It contains a list of field violations describing what went wrong.","properties":{"violations":{"description":"List of validation violations","items":{"$ref":"#/components/schemas/FieldViolation"},"type":"array"}},"required":["violations"],"type":"object"}}},"info":{"title":"NewsService API","version":"1.0.0"},"openapi":"3.1.0","paths":{"/api/news/v1/list-feed-digest":{"get":{"description":"ListFeedDigest returns a pre-aggregated digest of all RSS feeds for a site variant.","operationId":"ListFeedDigest","parameters":[{"description":"Site variant: full, tech, finance, happy","in":"query","name":"variant","required":false,"schema":{"type":"string"}},{"description":"ISO 639-1 language code (en, fr, ar, etc.)","in":"query","name":"lang","required":false,"schema":{"type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListFeedDigestResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"ListFeedDigest","tags":["NewsService"]}},"/api/news/v1/summarize-article":{"post":{"description":"SummarizeArticle generates an LLM summary with provider selection and fallback support.","operationId":"SummarizeArticle","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleRequest"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"SummarizeArticle","tags":["NewsService"]}},"/api/news/v1/summarize-article-cache":{"get":{"description":"GetSummarizeArticleCache looks up a cached summary by deterministic key (CDN-cacheable GET).","operationId":"GetSummarizeArticleCache","parameters":[{"description":"Deterministic cache key computed by buildSummaryCacheKey().","in":"query","name":"cache_key","required":false,"schema":{"type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SummarizeArticleResponse"}}},"description":"Successful response"},"400":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}},"description":"Validation error"},"default":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"description":"Error response"}},"summary":"GetSummarizeArticleCache","tags":["NewsService"]}}}} \ No newline at end of file diff --git a/docs/api/NewsService.openapi.yaml b/docs/api/NewsService.openapi.yaml index bfc943c694..81afc1b1b1 100644 --- a/docs/api/NewsService.openapi.yaml +++ b/docs/api/NewsService.openapi.yaml @@ -296,22 +296,13 @@ components: importanceScore: type: integer format: int32 - description: |- - Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%. - Absent (0) when not yet scored. + description: 'Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%.' corroborationCount: type: integer format: int32 description: Number of distinct sources that reported the same story in this digest cycle. - storyPhase: - type: string - enum: - - STORY_PHASE_UNSPECIFIED - - STORY_PHASE_BREAKING - - STORY_PHASE_DEVELOPING - - STORY_PHASE_SUSTAINED - - STORY_PHASE_FADING - description: StoryPhase represents the lifecycle stage of a tracked news story. + storyMeta: + $ref: '#/components/schemas/StoryMeta' required: - source - title @@ -357,3 +348,28 @@ components: format: double description: Longitude in decimal degrees (-180 to 180). description: GeoCoordinates represents a geographic location using WGS84 coordinates. + StoryMeta: + type: object + properties: + firstSeen: + type: integer + format: int64 + description: 'Epoch ms when the story first appeared in any digest cycle.. Warning: Values > 2^53 may lose precision in JavaScript' + mentionCount: + type: integer + format: int32 + description: Total number of digest cycles in which this story appeared. + sourceCount: + type: integer + format: int32 + description: Number of unique sources that reported this story (cached from Redis Set). + phase: + type: string + enum: + - STORY_PHASE_UNSPECIFIED + - STORY_PHASE_BREAKING + - STORY_PHASE_DEVELOPING + - STORY_PHASE_SUSTAINED + - STORY_PHASE_FADING + description: StoryPhase represents the lifecycle stage of a tracked news story. + description: StoryMeta carries cross-cycle persistence data attached to each news item. diff --git a/proto/worldmonitor/news/v1/news_item.proto b/proto/worldmonitor/news/v1/news_item.proto index 15e9063bf2..3b70eb1a5d 100644 --- a/proto/worldmonitor/news/v1/news_item.proto +++ b/proto/worldmonitor/news/v1/news_item.proto @@ -31,12 +31,11 @@ message NewsItem { // Human-readable location name. string location_name = 8; // Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%. - // Absent (0) when not yet scored. int32 importance_score = 9; // Number of distinct sources that reported the same story in this digest cycle. int32 corroboration_count = 10; - // Story lifecycle phase derived at read time from story:track metadata. - StoryPhase story_phase = 11; + // Story lifecycle metadata derived from cross-cycle persistence data. + StoryMeta story_meta = 11; } // StoryMeta carries cross-cycle persistence data attached to each news item. @@ -47,7 +46,7 @@ message StoryMeta { int32 mention_count = 2; // Number of unique sources that reported this story (cached from Redis Set). int32 source_count = 3; - // Story lifecycle phase. + // Story lifecycle phase derived from persistence data. StoryPhase phase = 4; } diff --git a/server/_shared/cache-keys.ts b/server/_shared/cache-keys.ts index 2fe6535f43..433f3e1133 100644 --- a/server/_shared/cache-keys.ts +++ b/server/_shared/cache-keys.ts @@ -1,3 +1,15 @@ +// ── Story persistence tracking keys (E3) ───────────────────────────────────── +// Hash: firstSeen, lastSeen, mentionCount, sourceCount, currentScore, peakScore, title, link, severity +export const STORY_TRACK_KEY_PREFIX = 'story:track:v1:'; +// Set: unique feed names that have mentioned this story +export const STORY_SOURCES_KEY_PREFIX = 'story:sources:v1:'; +// Sorted set: single member "peak" with score = highest importanceScore seen +export const STORY_PEAK_KEY_PREFIX = 'story:peak:v1:'; +// Sorted set: accumulator for digest mode notifications (score = pubDate epoch ms) +export const DIGEST_ACCUMULATOR_KEY_PREFIX = 'digest:accumulator:v1:'; +// TTL for all story tracking keys (48 hours) +export const STORY_TRACKING_TTL_S = 172800; + /** * Story tracking keys — written by list-feed-digest.ts, read by digest cron (E2). * All keys use 32-char SHA-256 hex prefix of the normalised title as ${titleHash}. diff --git a/server/worldmonitor/news/v1/list-feed-digest.ts b/server/worldmonitor/news/v1/list-feed-digest.ts index 7ab8ebb867..ac58369a62 100644 --- a/server/worldmonitor/news/v1/list-feed-digest.ts +++ b/server/worldmonitor/news/v1/list-feed-digest.ts @@ -5,6 +5,7 @@ import type { CategoryBucket, NewsItem as ProtoNewsItem, ThreatLevel as ProtoThreatLevel, + StoryMeta as ProtoStoryMeta, StoryPhase as ProtoStoryPhase, } from '../../../../src/generated/server/worldmonitor/news/v1/service_server'; import { cachedFetchJson, getCachedJsonBatch, runRedisPipeline } from '../../../_shared/redis'; @@ -20,6 +21,7 @@ import { STORY_PEAK_KEY, DIGEST_ACCUMULATOR_KEY, STORY_TTL, + STORY_TRACK_KEY_PREFIX, } from '../../../_shared/cache-keys'; import { getRelayBaseUrl, getRelayHeaders } from '../../../_shared/relay'; @@ -63,21 +65,6 @@ const SCORE_WEIGHTS = { recency: 0.1, } as const; -/** Derive story lifecycle phase from Redis-stored tracking data. */ -function computePhase( - mentionCount: number, - firstSeenMs: number, - lastSeenMs: number, - now: number, -): ProtoStoryPhase { - const ageH = (now - firstSeenMs) / 3_600_000; - const silenceH = (now - lastSeenMs) / 3_600_000; - if (silenceH > 24) return 'STORY_PHASE_FADING'; - if (mentionCount >= 3 && ageH >= 12) return 'STORY_PHASE_SUSTAINED'; - if (mentionCount >= 2) return 'STORY_PHASE_DEVELOPING'; - if (ageH < 2) return 'STORY_PHASE_BREAKING'; - return 'STORY_PHASE_UNSPECIFIED'; -} interface ParsedItem { source: string; @@ -91,13 +78,7 @@ interface ParsedItem { classSource: 'keyword' | 'llm'; importanceScore: number; corroborationCount: number; - storyPhase: ProtoStoryPhase; -} - -function normalizeTitle(title: string): string { - // 120-char window provides high headline discrimination in practice; - // see todo #102 if hash collision accuracy becomes a concern. - return title.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ').trim().slice(0, 120); + titleHash?: string; } function computeImportanceScore( @@ -246,7 +227,6 @@ function parseRssXml(xml: string, feed: ServerFeed, variant: string): ParsedItem classSource: 'keyword', importanceScore: 0, corroborationCount: 1, - storyPhase: 'STORY_PHASE_UNSPECIFIED', }); } @@ -316,13 +296,78 @@ async function enrichWithAiCache(items: ParsedItem[]): Promise { } } -function toProtoItem(item: ParsedItem): ProtoNewsItem { +// ── Story persistence tracking ──────────────────────────────────────────────── + +function normalizeTitle(title: string): string { + // \p{L} = any Unicode letter; \p{N} = any Unicode number. + // The `u` flag is required for Unicode property escapes — without it \w + // matches only ASCII [A-Za-z0-9_], stripping all Arabic/CJK/Cyrillic chars + // and collapsing every non-Latin title to the same empty hash. + return title + .toLowerCase() + .replace(/[^\p{L}\p{N}\s]/gu, '') + .replace(/\s+/g, ' ') + .trim() + .slice(0, 120); +} + +interface StoryTrack { + firstSeen: number; + lastSeen: number; + mentionCount: number; + sourceCount: number; + currentScore: number; + peakScore: number; +} + +function derivePhase(track: StoryTrack): ProtoStoryPhase { + const ageMs = Date.now() - track.firstSeen; + if (track.mentionCount <= 1) return 'STORY_PHASE_BREAKING'; + if (track.mentionCount <= 5 && ageMs < 2 * 60 * 60 * 1000) return 'STORY_PHASE_DEVELOPING'; + // FADING requires real scores from E1. Until E1 ships, currentScore and + // peakScore are both 0 (HSETNX placeholders), so this branch is intentionally + // inactive — stories fall through to SUSTAINED rather than incorrectly FADING. + if (track.currentScore > 0 && track.peakScore > 0 && track.currentScore < track.peakScore * 0.5) return 'STORY_PHASE_FADING'; + return 'STORY_PHASE_SUSTAINED'; +} + +/** + * Batch-read existing story:track hashes from Redis for a list of title hashes. + * Returns a Map. Missing entries are absent from the map. + */ +async function readStoryTracks(titleHashes: string[]): Promise> { + if (titleHashes.length === 0) return new Map(); + const fields = ['firstSeen', 'lastSeen', 'mentionCount', 'sourceCount', 'currentScore', 'peakScore']; + const commands = titleHashes.map(h => [ + 'HMGET', `${STORY_TRACK_KEY_PREFIX}${h}`, ...fields, + ]); + const results = await runRedisPipeline(commands, true); + const map = new Map(); + for (let i = 0; i < titleHashes.length; i++) { + const vals = results[i]?.result as string[] | null; + if (!vals || !vals[0]) continue; // firstSeen missing → new story + map.set(titleHashes[i]!, { + firstSeen: Number(vals[0]), + lastSeen: Number(vals[1] ?? 0), + mentionCount: Number(vals[2] ?? 0), + sourceCount: Number(vals[3] ?? 0), + currentScore: Number(vals[4] ?? 0), + peakScore: Number(vals[5] ?? 0), + }); + } + return map; +} + +function toProtoItem(item: ParsedItem, storyMeta?: ProtoStoryMeta): ProtoNewsItem { return { source: item.source, title: item.title, link: item.link, publishedAt: item.publishedAt, isAlert: item.isAlert, + importanceScore: item.importanceScore, + corroborationCount: item.corroborationCount ?? 0, + storyMeta, threat: { level: LEVEL_TO_PROTO[item.level], category: item.category, @@ -330,9 +375,6 @@ function toProtoItem(item: ParsedItem): ProtoNewsItem { source: item.classSource, }, locationName: '', - importanceScore: item.importanceScore, - corroborationCount: item.corroborationCount, - storyPhase: item.storyPhase, }; } @@ -483,38 +525,34 @@ async function buildDigest(variant: string, lang: string): Promise>(); - for (const items of results.values()) { - for (const item of items) { - const norm = normalizeTitle(item.title); - const sources = corroborationMap.get(norm) ?? new Set(); - sources.add(item.source); - corroborationMap.set(norm, sources); - } + await Promise.all(allItems.map(async (item) => { + const hash = await sha256Hex(normalizeTitle(item.title)); + item.titleHash = hash; + const sources = corroborationMap.get(hash) ?? new Set(); + sources.add(item.source); + corroborationMap.set(hash, sources); + })); + + for (const item of allItems) { + item.corroborationCount = corroborationMap.get(item.titleHash!)?.size ?? 1; } // Enrich ALL items with the AI classification cache BEFORE scoring so that - // importanceScore uses the final (post-LLM) threat level, and the subsequent - // truncation discards items based on their true score. Running enrichment - // after slicing was a bug: upgraded items could have been already cut, and - // downgraded items kept a score they no longer deserved. - const allItems = [...results.values()].flat(); + // importanceScore uses the final (post-LLM) threat level, and truncation + // discards items based on their true score. await enrichWithAiCache(allItems); - // Assign corroboration count and compute importance score using final levels. - for (const items of results.values()) { - for (const item of items) { - const norm = normalizeTitle(item.title); - item.corroborationCount = corroborationMap.get(norm)?.size ?? 1; - item.importanceScore = computeImportanceScore( - item.level, - item.source, - item.corroborationCount, - item.publishedAt, - ); - } + // Compute importance score using final (post-enrichment) threat levels. + for (const item of allItems) { + item.importanceScore = computeImportanceScore( + item.level, item.source, item.corroborationCount, item.publishedAt, + ); } // Sort by importanceScore desc, then pubDate desc; then truncate per category. @@ -527,41 +565,48 @@ async function buildDigest(variant: string, lang: string): Promise i.titleHash!); - // Pre-compute title hashes once — reused for tracking write and phase read. - const titleHashes = await Promise.all( - allSliced.map(item => sha256Hex(normalizeTitle(item.title))), - ); + const now = Date.now(); - // Write tracking FIRST so phase read sees this cycle's mentionCount/firstSeen. - // Without this ordering, first-time stories never return STORY_PHASE_BREAKING - // and all stories lag by one digest cycle. Awaited here so the write completes - // before the isolate moves on (digest is cached 15 min, negligible extra latency). + // Read existing story tracking BEFORE writing so we know the previous cycle's + // mentionCount. We merge read state + this cycle's increment in memory to + // produce accurate, current StoryMeta without a second Redis round-trip. + const uniqueHashes = [...new Set(titleHashes)]; + const storyTracks = await readStoryTracks(uniqueHashes).catch(() => new Map()); + + // Write story tracking. Errors never fail the digest build. await writeStoryTracking(allSliced, variant, titleHashes).catch((err: unknown) => console.warn('[digest] story tracking write failed:', err), ); - // Batch-read story tracking hashes (HGETALL) to assign lifecycle phases. - // Reads post-write data so first-time stories correctly get STORY_PHASE_BREAKING. - const trackResults = await runRedisPipeline( - titleHashes.map(h => ['HGETALL', STORY_TRACK_KEY(h)]), - ); - const phaseNow = Date.now(); - for (let i = 0; i < allSliced.length; i++) { - const raw = trackResults[i]?.result as Record | null | undefined; - if (raw && typeof raw === 'object' && raw.firstSeen) { - allSliced[i]!.storyPhase = computePhase( - Number(raw.mentionCount ?? '1'), - Number(raw.firstSeen), - Number(raw.lastSeen ?? raw.firstSeen), - phaseNow, - ); - } - } - for (const [category, sliced] of slicedByCategory) { categories[category] = { - items: sliced.map(toProtoItem), + items: sliced.map((item) => { + const hash = item.titleHash!; + const sourceCount = corroborationMap.get(hash)?.size ?? 1; + const stale = storyTracks.get(hash); + // Merge stale state + this cycle's HINCRBY to get the current mentionCount. + // New stories (stale = undefined) start at mentionCount=1 this cycle. + const mentionCount = stale ? stale.mentionCount + 1 : 1; + const firstSeen = stale?.firstSeen ?? now; + const merged: StoryTrack = { + firstSeen, + lastSeen: now, + mentionCount, + sourceCount, + currentScore: stale?.currentScore ?? 0, + peakScore: stale?.peakScore ?? 0, + }; + const storyMeta: ProtoStoryMeta = { + firstSeen, + mentionCount, + sourceCount, + phase: derivePhase(merged), + }; + return toProtoItem(item, storyMeta); + }), }; } diff --git a/src/app/data-loader.ts b/src/app/data-loader.ts index 877791f426..b8983ea1af 100644 --- a/src/app/data-loader.ts +++ b/src/app/data-loader.ts @@ -199,6 +199,13 @@ const PROTO_TO_CLIENT_LEVEL: Record = { THREAT_LEVEL_CRITICAL: 'critical', }; +const PROTO_TO_CLIENT_PHASE: Record = { + STORY_PHASE_BREAKING: 'breaking', + STORY_PHASE_DEVELOPING: 'developing', + STORY_PHASE_SUSTAINED: 'sustained', + STORY_PHASE_FADING: 'fading', +}; + function protoItemToNewsItem(p: ProtoNewsItem): NewsItem { const level = PROTO_TO_CLIENT_LEVEL[p.threat?.level ?? 'THREAT_LEVEL_UNSPECIFIED']; return { @@ -207,6 +214,14 @@ function protoItemToNewsItem(p: ProtoNewsItem): NewsItem { link: p.link, pubDate: new Date(p.publishedAt), isAlert: p.isAlert, + importanceScore: p.importanceScore || undefined, + corroborationCount: p.corroborationCount || undefined, + storyMeta: p.storyMeta && p.storyMeta.phase !== 'STORY_PHASE_UNSPECIFIED' ? { + firstSeen: p.storyMeta.firstSeen, + mentionCount: p.storyMeta.mentionCount, + sourceCount: p.storyMeta.sourceCount, + phase: PROTO_TO_CLIENT_PHASE[p.storyMeta.phase] ?? 'breaking', + } : undefined, threat: p.threat ? { level, category: p.threat.category as import('@/services/threat-classifier').EventCategory, diff --git a/src/components/NewsPanel.ts b/src/components/NewsPanel.ts index 1070c80d70..aff4f98ce0 100644 --- a/src/components/NewsPanel.ts +++ b/src/components/NewsPanel.ts @@ -455,6 +455,9 @@ export class NewsPanel extends Panel {
${escapeHtml(item.source)} ${item.lang && item.lang !== getCurrentLanguage() ? `${item.lang.toUpperCase()}` : ''} + ${item.storyMeta?.phase === 'breaking' ? 'BREAKING' : ''} + ${item.storyMeta?.phase === 'developing' ? `DEVELOPING${item.storyMeta.mentionCount > 1 ? ` ×${item.storyMeta.mentionCount}` : ''}` : ''} + ${item.storyMeta?.phase === 'sustained' ? 'ONGOING' : ''} ${item.isAlert ? 'ALERT' : ''}
${escapeHtml(item.title)} diff --git a/src/generated/client/worldmonitor/news/v1/service_client.ts b/src/generated/client/worldmonitor/news/v1/service_client.ts index fe1dd7acb9..28866cd113 100644 --- a/src/generated/client/worldmonitor/news/v1/service_client.ts +++ b/src/generated/client/worldmonitor/news/v1/service_client.ts @@ -54,7 +54,7 @@ export interface NewsItem { locationName: string; importanceScore: number; corroborationCount: number; - storyPhase: StoryPhase; + storyMeta?: StoryMeta; } export interface ThreatClassification { @@ -69,6 +69,13 @@ export interface GeoCoordinates { longitude: number; } +export interface StoryMeta { + firstSeen: number; + mentionCount: number; + sourceCount: number; + phase: StoryPhase; +} + export type StoryPhase = "STORY_PHASE_UNSPECIFIED" | "STORY_PHASE_BREAKING" | "STORY_PHASE_DEVELOPING" | "STORY_PHASE_SUSTAINED" | "STORY_PHASE_FADING"; export type SummarizeStatus = "SUMMARIZE_STATUS_UNSPECIFIED" | "SUMMARIZE_STATUS_SUCCESS" | "SUMMARIZE_STATUS_CACHED" | "SUMMARIZE_STATUS_SKIPPED" | "SUMMARIZE_STATUS_ERROR"; diff --git a/src/generated/server/worldmonitor/news/v1/service_server.ts b/src/generated/server/worldmonitor/news/v1/service_server.ts index 2a2485a6cb..bb43887108 100644 --- a/src/generated/server/worldmonitor/news/v1/service_server.ts +++ b/src/generated/server/worldmonitor/news/v1/service_server.ts @@ -54,7 +54,7 @@ export interface NewsItem { locationName: string; importanceScore: number; corroborationCount: number; - storyPhase: StoryPhase; + storyMeta?: StoryMeta; } export interface ThreatClassification { @@ -69,6 +69,13 @@ export interface GeoCoordinates { longitude: number; } +export interface StoryMeta { + firstSeen: number; + mentionCount: number; + sourceCount: number; + phase: StoryPhase; +} + export type StoryPhase = "STORY_PHASE_UNSPECIFIED" | "STORY_PHASE_BREAKING" | "STORY_PHASE_DEVELOPING" | "STORY_PHASE_SUSTAINED" | "STORY_PHASE_FADING"; export type SummarizeStatus = "SUMMARIZE_STATUS_UNSPECIFIED" | "SUMMARIZE_STATUS_SUCCESS" | "SUMMARIZE_STATUS_CACHED" | "SUMMARIZE_STATUS_SKIPPED" | "SUMMARIZE_STATUS_ERROR"; diff --git a/src/services/breaking-news-alerts.ts b/src/services/breaking-news-alerts.ts index 211b6d6a2d..2c00d3e211 100644 --- a/src/services/breaking-news-alerts.ts +++ b/src/services/breaking-news-alerts.ts @@ -222,6 +222,11 @@ export function checkBatchForBreakingAlerts(items: NewsItem[]): void { const key = makeAlertKey(item.title, item.source, item.link); if (isDuplicate(key)) continue; + // Sustained/fading stories are already well-covered; only break/develop phases + // warrant a banner interrupt. Unspecified (no storyMeta) passes through. + const phase = item.storyMeta?.phase; + if (phase === 'sustained' || phase === 'fading') continue; + const isBetter = !best || (level === 'critical' && best.threatLevel !== 'critical') || (level === best.threatLevel && item.pubDate.getTime() > best.timestamp.getTime()); diff --git a/src/types/index.ts b/src/types/index.ts index af9896a76f..f256a8eee0 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -92,6 +92,14 @@ export interface ThreatClassification { export type StoryPhase = 'breaking' | 'developing' | 'sustained' | 'fading'; +export interface StoryMeta { + firstSeen: number; // epoch ms + mentionCount: number; + sourceCount: number; + phase: StoryPhase; +} + + export interface NewsItem { source: string; title: string; @@ -109,7 +117,7 @@ export interface NewsItem { imageUrl?: string; importanceScore?: number; corroborationCount?: number; - storyPhase?: StoryPhase; + storyMeta?: StoryMeta; } export type VelocityLevel = 'normal' | 'elevated' | 'spike';