Skip to content
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Skills/Web: show skill owner avatar + handle on skill cards, lists, and detail pages (#312) (thanks @ianalloway).
- Skills/Web: add file viewer for skill version files on detail page (#44) (thanks @regenrek).
- CLI: add `uninstall` command for skills (#241) (thanks @superlowburn).
- CI/Security: add TruffleHog pull-request scanning for verified leaked credentials (#505) (thanks @akses0).

### Changed
- Quality gate: language-aware word counting (`Intl.Segmenter`) and new `cjkChars` signal to reduce false rejects for non-Latin docs.
Expand All @@ -17,6 +18,8 @@
- Skills: reserve deleted slugs for prior owners (90-day cooldown) to prevent squatting; add admin reclaim flow (#298) (thanks @autogame-17).
- Moderation: ban flow soft-deletes owned skills (reversible) and removes them from vector search (#298) (thanks @autogame-17).
- LLM helpers: centralize OpenAI Responses text extraction for changelog/summary/eval flows (#502) (thanks @ianalloway).
- Rate limiting: apply authenticated quotas by user bucket (vs shared IP), emit delay-based reset headers, and improve CLI 429 guidance/retries (#412) (thanks @lc0rp).
- Search/listing performance: cut embedding hydration and badge read bandwidth via `embeddingSkillMap` + denormalized skill badges; shift stat-doc sync to low-frequency cron (#441) (thanks @sethconvex).

### Fixed
- Admin API: `POST /api/v1/users/reclaim` now performs non-destructive root-slug owner transfer
Expand Down
16 changes: 14 additions & 2 deletions convex/crons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,30 @@ crons.interval(

crons.interval(
'skill-stats-backfill',
{ minutes: 10 },
{ hours: 6 },
internal.statsMaintenance.runSkillStatBackfillInternal,
{ batchSize: 200, maxBatches: 5 },
)

// Runs frequently to keep dailyStats/trending accurate,
// but does NOT patch skill documents (only writes to skillDailyStats).
crons.interval(
'skill-stat-events',
{ minutes: 5 },
{ minutes: 15 },
internal.skillStatEvents.processSkillStatEventsAction,
{},
)

// Syncs accumulated stat deltas to skill documents every 6 hours.
// Runs infrequently to avoid thundering-herd reactive query invalidation.
// Uses processedAt field to track progress (independent of the action cursor).
crons.interval(
'skill-doc-stat-sync',
{ hours: 6 },
internal.skillStatEvents.processSkillStatEventsInternal,
{ batchSize: 500 },
)

crons.interval(
'global-stats-update',
{ minutes: 60 },
Expand Down
1 change: 1 addition & 0 deletions convex/devSeed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ export const seedSkillMutation = internalMutation({
visibility: 'latest-approved',
updatedAt: now,
})
await ctx.db.insert('embeddingSkillMap', { embeddingId, skillId })

await ctx.db.patch(skillId, {
latestVersionId: versionId,
Expand Down
2 changes: 1 addition & 1 deletion convex/lib/badges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export async function getSkillBadgeMap(
const records = await ctx.db
.query('skillBadges')
.withIndex('by_skill', (q) => q.eq('skillId', skillId))
.collect()
.take(10)
return buildBadgeMap(records)
}

Expand Down
76 changes: 76 additions & 0 deletions convex/maintenance.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const {
backfillSkillSummariesInternalHandler,
cleanupEmptySkillsInternalHandler,
nominateEmptySkillSpammersInternalHandler,
upsertSkillBadgeRecordInternal,
} = await import('./maintenance')
const { internal } = await import('./_generated/api')
const { generateSkillSummary } = await import('./lib/skillSummary')
Expand Down Expand Up @@ -196,6 +197,81 @@ describe('maintenance backfill', () => {
})
})

describe('maintenance badge denormalization', () => {
it('upserts table badge and keeps skill.badges in sync', async () => {
const unique = vi.fn().mockResolvedValue(null)
const query = vi.fn().mockReturnValue({
withIndex: () => ({ unique }),
})
const insert = vi.fn().mockResolvedValue('skillBadges:1')
const get = vi.fn().mockResolvedValue({ _id: 'skills:1', badges: undefined })
const patch = vi.fn().mockResolvedValue(undefined)

const ctx = {
db: {
query,
insert,
get,
patch,
},
} as never

const result = await (upsertSkillBadgeRecordInternal as { _handler: Function })._handler(ctx, {
skillId: 'skills:1',
kind: 'highlighted',
byUserId: 'users:1',
at: 123,
})

expect(result).toEqual({ inserted: true })
expect(insert).toHaveBeenCalledWith('skillBadges', {
skillId: 'skills:1',
kind: 'highlighted',
byUserId: 'users:1',
at: 123,
})
expect(patch).toHaveBeenCalledWith('skills:1', {
badges: {
highlighted: { byUserId: 'users:1', at: 123 },
},
})
})

it('resyncs denormalized badge even when table record already exists', async () => {
const unique = vi.fn().mockResolvedValue({ _id: 'skillBadges:existing' })
const query = vi.fn().mockReturnValue({
withIndex: () => ({ unique }),
})
const insert = vi.fn()
const get = vi.fn().mockResolvedValue({ _id: 'skills:1', badges: {} })
const patch = vi.fn().mockResolvedValue(undefined)

const ctx = {
db: {
query,
insert,
get,
patch,
},
} as never

const result = await (upsertSkillBadgeRecordInternal as { _handler: Function })._handler(ctx, {
skillId: 'skills:1',
kind: 'official',
byUserId: 'users:2',
at: 456,
})

expect(result).toEqual({ inserted: false })
expect(insert).not.toHaveBeenCalled()
expect(patch).toHaveBeenCalledWith('skills:1', {
badges: {
official: { byUserId: 'users:2', at: 456 },
},
})
})
})

describe('maintenance fingerprint backfill', () => {
it('backfills fingerprint field and inserts index entry', async () => {
const { hashSkillFiles } = await import('./lib/skills')
Expand Down
120 changes: 119 additions & 1 deletion convex/maintenance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -642,17 +642,32 @@ export const upsertSkillBadgeRecordInternal = internalMutation({
at: v.number(),
},
handler: async (ctx, args) => {
const syncDenormalizedBadge = async () => {
const skill = await ctx.db.get(args.skillId)
if (!skill) return
await ctx.db.patch(args.skillId, {
badges: {
...(skill.badges as Record<string, unknown> | undefined),
[args.kind]: { byUserId: args.byUserId, at: args.at },
},
})
}

const existing = await ctx.db
.query('skillBadges')
.withIndex('by_skill_kind', (q) => q.eq('skillId', args.skillId).eq('kind', args.kind))
.unique()
if (existing) return { inserted: false as const }
if (existing) {
await syncDenormalizedBadge()
return { inserted: false as const }
}
await ctx.db.insert('skillBadges', {
skillId: args.skillId,
kind: args.kind,
byUserId: args.byUserId,
at: args.at,
})
await syncDenormalizedBadge()
return { inserted: true as const }
},
})
Expand Down Expand Up @@ -1411,6 +1426,109 @@ export const nominateEmptySkillSpammers: ReturnType<typeof action> = action({
},
})

// Backfill embeddingSkillMap from existing skillEmbeddings.
// Run once after deploying the schema change:
// npx convex run maintenance:backfillEmbeddingSkillMapInternal --prod
export const backfillEmbeddingSkillMapInternal = internalMutation({
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider adding a backfill to populate skill.badges from existing skillBadges table records. currently only new badge changes will sync to skill docs, leaving existing skills without denormalized badges until their badges are modified

Prompt To Fix With AI
This is a comment left during a code review.
Path: convex/maintenance.ts
Line: 1417

Comment:
consider adding a backfill to populate `skill.badges` from existing `skillBadges` table records. currently only new badge changes will sync to skill docs, leaving existing skills without denormalized badges until their badges are modified

How can I resolve this? If you propose a fix, please make it concise.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added backfillDenormalizedBadgesInternal in a22521f — reads from skillBadges table and syncs to skill.badges field. Self-scheduling paginated mutation, included in the migration steps.

args: {
cursor: v.optional(v.string()),
batchSize: v.optional(v.number()),
},
handler: async (ctx, args) => {
const batchSize = clampInt(args.batchSize ?? 200, 10, 500)
const { page, continueCursor, isDone } = await ctx.db
.query('skillEmbeddings')
.paginate({ cursor: args.cursor ?? null, numItems: batchSize })

let inserted = 0
for (const embedding of page) {
const existing = await ctx.db
.query('embeddingSkillMap')
.withIndex('by_embedding', (q) => q.eq('embeddingId', embedding._id))
.unique()
if (!existing) {
await ctx.db.insert('embeddingSkillMap', {
embeddingId: embedding._id,
skillId: embedding.skillId,
})
inserted++
}
}

if (!isDone) {
await ctx.scheduler.runAfter(0, internal.maintenance.backfillEmbeddingSkillMapInternal, {
cursor: continueCursor,
batchSize: args.batchSize,
})
}

return { inserted, isDone, scanned: page.length }
},
})

// Sync skillBadges table → denormalized skill.badges field.
// Run after deploying the badge-read removal to ensure all skills
// have up-to-date badges on the skill doc itself.
export const backfillDenormalizedBadgesInternal = internalMutation({
args: {
cursor: v.optional(v.string()),
batchSize: v.optional(v.number()),
},
handler: async (ctx, args) => {
const batchSize = clampInt(args.batchSize ?? 100, 10, 200)
const { page, continueCursor, isDone } = await ctx.db
.query('skills')
.paginate({ cursor: args.cursor ?? null, numItems: batchSize })

let patched = 0
for (const skill of page) {
const records = await ctx.db
.query('skillBadges')
.withIndex('by_skill', (q) => q.eq('skillId', skill._id))
.take(10)

// Build canonical badge map from the table
const canonical: Record<string, { byUserId: Id<'users'>; at: number }> = {}
for (const r of records) {
canonical[r.kind] = { byUserId: r.byUserId, at: r.at }
}

// Compare with existing denormalized badges (keys + values)
const existing = (skill.badges ?? {}) as Record<
string,
{ byUserId?: Id<'users'>; at?: number } | undefined
>
const canonicalKeys = Object.keys(canonical)
const existingKeys = Object.keys(existing).filter((k) => existing[k] !== undefined)
const needsPatch =
canonicalKeys.length !== existingKeys.length ||
canonicalKeys.some((k) => {
const current = existing[k]
const next = canonical[k]
return (
!current ||
current.byUserId !== next.byUserId ||
current.at !== next.at
)
})

if (needsPatch) {
await ctx.db.patch(skill._id, { badges: canonical })
patched++
}
}

if (!isDone) {
await ctx.scheduler.runAfter(0, internal.maintenance.backfillDenormalizedBadgesInternal, {
cursor: continueCursor,
batchSize: args.batchSize,
})
}

return { patched, isDone, scanned: page.length }
},
})

function clampInt(value: number, min: number, max: number) {
const rounded = Math.trunc(value)
if (!Number.isFinite(rounded)) return min
Expand Down
9 changes: 9 additions & 0 deletions convex/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ const skillEmbeddings = defineTable({
filterFields: ['visibility'],
})

// Lightweight lookup: embeddingId → skillId (~100 bytes per doc).
// Avoids reading full skillEmbeddings docs (~12KB each with vector)
// during search hydration.
const embeddingSkillMap = defineTable({
embeddingId: v.id('skillEmbeddings'),
skillId: v.id('skills'),
}).index('by_embedding', ['embeddingId'])

const skillDailyStats = defineTable({
skillId: v.id('skills'),
day: v.number(),
Expand Down Expand Up @@ -576,6 +584,7 @@ export default defineSchema({
skillBadges,
soulVersionFingerprints,
skillEmbeddings,
embeddingSkillMap,
soulEmbeddings,
skillDailyStats,
skillLeaderboards,
Expand Down
Loading
Loading