diff --git a/packages/core/src/constants/search.constant.ts b/packages/core/src/constants/search.constant.ts index d00f16d8..a90b9333 100644 --- a/packages/core/src/constants/search.constant.ts +++ b/packages/core/src/constants/search.constant.ts @@ -12,3 +12,23 @@ export type SearchOperator = (typeof SEARCH_OPERATORS)[number]["operator"]; export const STATE_SUBMENU_VALUES = ["open", "closed", "all"] as const; export type StateSubmenuValue = (typeof STATE_SUBMENU_VALUES)[number]; + +// Ranking weights (should sum to 1) +export const RANKING_WEIGHTS = { + SEMANTIC_SIMILARITY: 0.5, // Start with higher weight for semantic search + COMMENT_COUNT: 0.25, // Activity level + RECENCY: 0.2, // Recent updates + ISSUE_STATE: 0.05, // Small bonus for open issues +} as const; + +// Time-based constants +export const TIME_CONSTANTS = { + // Base time unit in days for recency calculation + RECENCY_BASE_DAYS: 30, +} as const; + +// Score multipliers +export const SCORE_MULTIPLIERS = { + OPEN_ISSUE: 1.0, + CLOSED_ISSUE: 0.8, +} as const; diff --git a/packages/core/src/db/index.ts b/packages/core/src/db/index.ts index 6f1b50e7..c405adc0 100644 --- a/packages/core/src/db/index.ts +++ b/packages/core/src/db/index.ts @@ -39,6 +39,7 @@ export function createDb(config: { connectionString: string; isProd: boolean; }) { + // Disable prefetch as it is not supported for "Transaction" pool mode const client = postgres(config.connectionString, { prepare: false }); return { db: drizzle(client, { diff --git a/packages/core/src/semsearch.ts b/packages/core/src/semsearch.ts index 47c02f0a..39d90277 100644 --- a/packages/core/src/semsearch.ts +++ b/packages/core/src/semsearch.ts @@ -1,4 +1,9 @@ import type { RateLimiter } from "./constants/rate-limit.constant"; +import { + RANKING_WEIGHTS, + SCORE_MULTIPLIERS, + TIME_CONSTANTS, +} from "./constants/search.constant"; import type { DbClient } from "./db"; import { and, cosineDistance, desc, eq, gt, ilike, or, sql } from "./db"; import { comments } from "./db/schema/entities/comment.sql"; @@ -50,7 +55,49 @@ export namespace SemanticSearch { }, openai, ); - const similarity = sql`1-(${cosineDistance(issueTable.embedding, embedding)})`; + const similarity = sql`(1-(${cosineDistance(issueTable.embedding, embedding)}))::float`; + + // Exponential decay for recency score + // exp(-t/τ) where: + // t is time elapsed in days + // τ (tau) is the characteristic decay time in days + // After 30 days (RECENCY_BASE_DAYS), score will be ~0.37 (1/e) + // After 60 days, score will be ~0.14 (1/e²) + // Score approaches but never reaches 0 + const recencyScore = sql` + EXP( + -1.0 * + EXTRACT(EPOCH FROM (NOW() - ${issueTable.issueUpdatedAt}))::float / + (86400 * ${TIME_CONSTANTS.RECENCY_BASE_DAYS}) -- Convert decay time to seconds + )::float + `; + + // Logarithmic comment score normalization + // ln(x + 1) gives us: + // 0 comments = 0.0 + // 4 comments ≈ 1.6 + // 5 comments ≈ 1.8 + // 10 comments ≈ 2.4 + // 20 comments ≈ 3.0 + // 50 comments ≈ 3.9 + // Then normalize to 0-1 range by dividing by ln(50 + 1) + const commentScore = sql` + LN(GREATEST(count(${comments.id})::float + 1, 1)) / + LN(51) -- ln(50 + 1) ≈ 3.93 as normalizing factor + `; + + // Combined ranking score + const rankingScore = sql` + (${RANKING_WEIGHTS.SEMANTIC_SIMILARITY}::float * ${similarity}) + + (${RANKING_WEIGHTS.RECENCY}::float * ${recencyScore}) + + (${RANKING_WEIGHTS.COMMENT_COUNT}::float * ${commentScore}) + + (${RANKING_WEIGHTS.ISSUE_STATE}::float * ( + CASE + WHEN ${issueTable.issueState} = 'OPEN' THEN ${SCORE_MULTIPLIERS.OPEN_ISSUE}::float + ELSE ${SCORE_MULTIPLIERS.CLOSED_ISSUE}::float + END + )) + `; const selected = db .select({ @@ -83,6 +130,7 @@ export namespace SemanticSearch { repoOwnerName: repos.owner, repoLastSyncedAt: repos.lastSyncedAt, commentCount: count(comments.id).as("comment_count"), + rankingScore, }) .from(issueTable) .leftJoin(repos, eq(issueTable.repoId, repos.id)) @@ -95,10 +143,11 @@ export namespace SemanticSearch { repos.owner, repos.lastSyncedAt, ) - .orderBy(desc(similarity)) + .orderBy(desc(rankingScore)) .where( and( eq(repos.initStatus, "completed"), + // probably should switch to ranking score? gt(similarity, SIMILARITY_THRESHOLD), // general substring queries match either title or body ...substringQueries.map((subQuery) => diff --git a/packages/web/src/components/IssueCard.tsx b/packages/web/src/components/IssueCard.tsx index c40c3f16..d5baec0e 100644 --- a/packages/web/src/components/IssueCard.tsx +++ b/packages/web/src/components/IssueCard.tsx @@ -153,10 +153,13 @@ function IssueTitleWithLabels({ issue }: { issue: Issue }) {