Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions packages/core/src/constants/search.constant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,23 @@ export type SearchOperator = (typeof SEARCH_OPERATORS)[number]["operator"];

export const STATE_SUBMENU_VALUES = ["open", "closed", "all"] as const;
export type StateSubmenuValue = (typeof STATE_SUBMENU_VALUES)[number];

// Ranking weights (should sum to 1)
export const RANKING_WEIGHTS = {
SEMANTIC_SIMILARITY: 0.5, // Start with higher weight for semantic search
COMMENT_COUNT: 0.25, // Activity level
RECENCY: 0.2, // Recent updates
ISSUE_STATE: 0.05, // Small bonus for open issues
} as const;

// Time-based constants
export const TIME_CONSTANTS = {
// Base time unit in days for recency calculation
RECENCY_BASE_DAYS: 30,
} as const;

// Score multipliers
export const SCORE_MULTIPLIERS = {
OPEN_ISSUE: 1.0,
CLOSED_ISSUE: 0.8,
} as const;
1 change: 1 addition & 0 deletions packages/core/src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export function createDb(config: {
connectionString: string;
isProd: boolean;
}) {
// Disable prefetch as it is not supported for "Transaction" pool mode
const client = postgres(config.connectionString, { prepare: false });
return {
db: drizzle(client, {
Expand Down
53 changes: 51 additions & 2 deletions packages/core/src/semsearch.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import type { RateLimiter } from "./constants/rate-limit.constant";
import {
RANKING_WEIGHTS,
SCORE_MULTIPLIERS,
TIME_CONSTANTS,
} from "./constants/search.constant";
import type { DbClient } from "./db";
import { and, cosineDistance, desc, eq, gt, ilike, or, sql } from "./db";
import { comments } from "./db/schema/entities/comment.sql";
Expand Down Expand Up @@ -50,7 +55,49 @@ export namespace SemanticSearch {
},
openai,
);
const similarity = sql<number>`1-(${cosineDistance(issueTable.embedding, embedding)})`;
const similarity = sql<number>`(1-(${cosineDistance(issueTable.embedding, embedding)}))::float`;

// Exponential decay for recency score
// exp(-t/τ) where:
// t is time elapsed in days
// τ (tau) is the characteristic decay time in days
// After 30 days (RECENCY_BASE_DAYS), score will be ~0.37 (1/e)
// After 60 days, score will be ~0.14 (1/e²)
// Score approaches but never reaches 0
const recencyScore = sql<number>`
EXP(
-1.0 *
EXTRACT(EPOCH FROM (NOW() - ${issueTable.issueUpdatedAt}))::float /
(86400 * ${TIME_CONSTANTS.RECENCY_BASE_DAYS}) -- Convert decay time to seconds
)::float
`;

// Logarithmic comment score normalization
// ln(x + 1) gives us:
// 0 comments = 0.0
// 4 comments ≈ 1.6
// 5 comments ≈ 1.8
// 10 comments ≈ 2.4
// 20 comments ≈ 3.0
// 50 comments ≈ 3.9
// Then normalize to 0-1 range by dividing by ln(50 + 1)
const commentScore = sql<number>`
LN(GREATEST(count(${comments.id})::float + 1, 1)) /
LN(51) -- ln(50 + 1) ≈ 3.93 as normalizing factor
`;

// Combined ranking score
const rankingScore = sql<number>`
(${RANKING_WEIGHTS.SEMANTIC_SIMILARITY}::float * ${similarity}) +
(${RANKING_WEIGHTS.RECENCY}::float * ${recencyScore}) +
(${RANKING_WEIGHTS.COMMENT_COUNT}::float * ${commentScore}) +
(${RANKING_WEIGHTS.ISSUE_STATE}::float * (
CASE
WHEN ${issueTable.issueState} = 'OPEN' THEN ${SCORE_MULTIPLIERS.OPEN_ISSUE}::float
ELSE ${SCORE_MULTIPLIERS.CLOSED_ISSUE}::float
END
))
`;

const selected = db
.select({
Expand Down Expand Up @@ -83,6 +130,7 @@ export namespace SemanticSearch {
repoOwnerName: repos.owner,
repoLastSyncedAt: repos.lastSyncedAt,
commentCount: count(comments.id).as("comment_count"),
rankingScore,
})
.from(issueTable)
.leftJoin(repos, eq(issueTable.repoId, repos.id))
Expand All @@ -95,10 +143,11 @@ export namespace SemanticSearch {
repos.owner,
repos.lastSyncedAt,
)
.orderBy(desc(similarity))
.orderBy(desc(rankingScore))
.where(
and(
eq(repos.initStatus, "completed"),
// probably should switch to ranking score?
gt(similarity, SIMILARITY_THRESHOLD),
// general substring queries match either title or body
...substringQueries.map((subQuery) =>
Expand Down
11 changes: 7 additions & 4 deletions packages/web/src/components/IssueCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,13 @@ function IssueTitleWithLabels({ issue }: { issue: Issue }) {
<span
className="[word-break:break-word]"
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(`${processTitle(issue.title)}`, {
ALLOWED_TAGS: ["code"],
ALLOWED_ATTR: [],
}),
__html: DOMPurify.sanitize(
`(${(issue.rankingScore * 100).toFixed(1)}%) ${processTitle(issue.title)}`,
{
ALLOWED_TAGS: ["code"],
ALLOWED_ATTR: [],
},
),
}}
/>
</a>
Expand Down
Loading