diff --git a/components/layout/Search.tsx b/components/layout/Search.tsx index 868a85f8..8cd3e5cb 100644 --- a/components/layout/Search.tsx +++ b/components/layout/Search.tsx @@ -13,5 +13,5 @@ const typesenseClient = new Client({ }); export default function NcSearchDialog(props: SharedProps) { - return ; + return ; } \ No newline at end of file diff --git a/scripts/syncIndex.ts b/scripts/syncIndex.ts index 2dbb18c5..ce522749 100644 --- a/scripts/syncIndex.ts +++ b/scripts/syncIndex.ts @@ -25,7 +25,7 @@ const client = new Client({ apiKey: process.env?.TYPE_SENSE_ADMIN_API, }) -const collectionName = 'noco-docs-v2'; +const collectionName = 'noco-docs-v3'; sync(client, { collection: collectionName, diff --git a/utils/search/client.ts b/utils/search/client.ts index 63103042..bc9ada97 100644 --- a/utils/search/client.ts +++ b/utils/search/client.ts @@ -54,26 +54,30 @@ export async function searchDocs( query: string, tag?: string, ): Promise { - // Define search parameters based on Typesense capabilities + // Define search parameters with DocSearch-style hierarchical boosting const searchParams: any = { q: query, - query_by: 'title,section,content', - query_by_weights: '6,4,1', // Give even higher weight to title - prefix: true, // Enable prefix searching - infix: 'always', // Enable infix searching to match parts of words + query_by: 'title,lvl0,lvl1,lvl2,lvl3,lvl4,lvl5,section,content', + query_by_weights: '10,9,8,6,4,3,2,5,1', // Hierarchical weighting typo_tolerance: true, // Enable typo tolerance - num_typos: 2, // Allow up to 2 typos + num_typos: 4, // Reduced typos for better precision boost: { - is_root_heading: 2, // Boost root headings - heading_level: { - value: 1, // Boost h1 headings - function: "reciprocal" // Lower heading levels get less boost - } + type: { + 'lvl0': 10, // Highest boost for main sections + 'lvl1': 8, // High boost for page titles + 'lvl2': 6, // Medium-high boost for major headings + 'lvl3': 4, // Medium boost for subsections + 'lvl4': 3, // Lower boost for minor headings + 'lvl5': 2, // Lowest boost for smallest headings + 'content': 1 // Base boost for content + }, + is_root_heading: 2, // Additional boost for root headings + is_main_content: 1 // Boost main content }, - sort_by: '_text_match:asc', - per_page: 15, // Increase results per page + sort_by: '_text_match:desc', // Sort by relevance + per_page: 12, // Increase results per page filter_by: undefined as string | undefined, - contextual_search: true, + contextual_search: false, }; if (tag) { @@ -86,9 +90,10 @@ export async function searchDocs( .search({ ...searchParams, q: '*', - per_page: 8, + per_page: 12, group_by: 'page_id', - group_limit: 1 + group_limit: 1, + sort_by: 'type:asc' // Show pages first }); return groupResults(results.hits || []) diff --git a/utils/search/typesense.ts b/utils/search/typesense.ts index 06682051..5d15a082 100644 --- a/utils/search/typesense.ts +++ b/utils/search/typesense.ts @@ -38,6 +38,29 @@ export interface BaseIndex { section_id?: string; content: string; description?: string; + + // DocSearch-style hierarchy + lvl0?: string; // Main section/category + lvl1?: string; // Page title + lvl2?: string; // Major headings (h2) + lvl3?: string; // Subsections (h3) + lvl4?: string; // Minor headings (h4) + lvl5?: string; // Smallest headings (h5) + + // Enhanced fields + type: 'lvl0' | 'lvl1' | 'lvl2' | 'lvl3' | 'lvl4' | 'lvl5' | 'content'; + hierarchy: { + lvl0?: string; + lvl1?: string; + lvl2?: string; + lvl3?: string; + lvl4?: string; + lvl5?: string; + }; + anchor?: string; + heading_level?: number; + is_root_heading?: boolean; + is_main_content: boolean; [key: string]: unknown; } @@ -88,6 +111,17 @@ export async function createOrUpdateCollection( {name: 'content', type: 'string'}, {name: 'heading_level', type: 'int32', optional: true}, {name: 'is_root_heading', type: 'bool', optional: true}, + + // DocSearch-style hierarchy fields + {name: 'lvl0', type: 'string', optional: true}, + {name: 'lvl1', type: 'string', optional: true}, + {name: 'lvl2', type: 'string', optional: true}, + {name: 'lvl3', type: 'string', optional: true}, + {name: 'lvl4', type: 'string', optional: true}, + {name: 'lvl5', type: 'string', optional: true}, + {name: 'type', type: 'string', facet: true, sort: true}, + {name: 'anchor', type: 'string', optional: true}, + {name: 'is_main_content', type: 'bool', optional: true}, ], }; @@ -106,18 +140,81 @@ function toIndex(page: DocumentRecord): BaseIndex[] { const scannedHeadings = new Set(); let relatedTopicsFound = false; + // Track hierarchical context + const hierarchyContext = { + lvl0: getPageCategory(page), + lvl1: page.title, + lvl2: undefined as string | undefined, + lvl3: undefined as string | undefined, + lvl4: undefined as string | undefined, + lvl5: undefined as string | undefined, + }; + // Get heading level (h1, h2, etc.) function getHeadingLevel(heading: any): number { if (!heading || !heading.depth) return 0; return heading.depth; } + // Get page category from URL or tag + function getPageCategory(page: DocumentRecord): string { + if (page.tag) return page.tag; + + // Extract category from URL path + const urlPath = page.url.replace(/^.*\/docs\//, ''); + const pathParts = urlPath.split('/'); + + if (pathParts.length > 1) { + return pathParts[0].replace(/-/g, ' '); + } + + return 'Documentation'; + } + + // Update hierarchy context based on heading level + function updateHierarchyContext(heading: any, content: string) { + const level = getHeadingLevel(heading); + + switch (level) { + case 1: + hierarchyContext.lvl1 = content; + hierarchyContext.lvl2 = undefined; + hierarchyContext.lvl3 = undefined; + hierarchyContext.lvl4 = undefined; + hierarchyContext.lvl5 = undefined; + break; + case 2: + hierarchyContext.lvl2 = content; + hierarchyContext.lvl3 = undefined; + hierarchyContext.lvl4 = undefined; + hierarchyContext.lvl5 = undefined; + break; + case 3: + hierarchyContext.lvl3 = content; + hierarchyContext.lvl4 = undefined; + hierarchyContext.lvl5 = undefined; + break; + case 4: + hierarchyContext.lvl4 = content; + hierarchyContext.lvl5 = undefined; + break; + case 5: + case 6: + hierarchyContext.lvl5 = content; + break; + } + } + + // Create hierarchical index record function createIndex( section: string | undefined, sectionId: string | undefined, content: string, + type: BaseIndex['type'] = 'content', additionalFields: Partial = {} ): BaseIndex { + const currentHierarchy = { ...hierarchyContext }; + return { id: `${page._id}-${(id++).toString()}`, title: page.title, @@ -128,13 +225,35 @@ function toIndex(page: DocumentRecord): BaseIndex[] { section_id: sectionId, content, description: page.description, + + // DocSearch-style hierarchy + lvl0: currentHierarchy.lvl0, + lvl1: currentHierarchy.lvl1, + lvl2: currentHierarchy.lvl2, + lvl3: currentHierarchy.lvl3, + lvl4: currentHierarchy.lvl4, + lvl5: currentHierarchy.lvl5, + + // Enhanced fields + type, + hierarchy: currentHierarchy, + anchor: sectionId, + is_main_content: type === 'content', + ...(page.extra_data || {}), ...additionalFields }; } + // Add page title as lvl1 record + indexes.push(createIndex(undefined, undefined, page.title, 'lvl1', { + heading_level: 1, + is_root_heading: true + })); + + // Add page description as content if available if (page.description) { - indexes.push(createIndex(undefined, undefined, page.description)); + indexes.push(createIndex(undefined, undefined, page.description, 'content')); } // Find if there's a "Related topics" heading @@ -143,7 +262,8 @@ function toIndex(page: DocumentRecord): BaseIndex[] { h.content.toLowerCase() === 'related topic' || h.content.toLowerCase() === 'related resources' || h.content.toLowerCase() === 'related resource' || - h.content.toLowerCase() === 'related fields' + h.content.toLowerCase() === 'related fields' || + h.content.toLowerCase() === 'available tools' ); page.structured.contents.forEach((p) => { @@ -158,11 +278,6 @@ function toIndex(page: DocumentRecord): BaseIndex[] { // Check if we've reached the "Related topics" section if (heading && relatedTopicsHeading && heading.id === relatedTopicsHeading.id) { relatedTopicsFound = true; - // Include the "Related topics" heading itself - if (!scannedHeadings.has(heading.id)) { - scannedHeadings.add(heading.id); - indexes.push(createIndex(heading.content, heading.id, heading.content)); - } return; } @@ -171,21 +286,32 @@ function toIndex(page: DocumentRecord): BaseIndex[] { return; } - if (p.content && p.content !== heading?.content) { - indexes.push(createIndex(heading?.content, heading?.id, p.content)); - } - + // Process heading first to update hierarchy context if (heading && !scannedHeadings.has(heading.id)) { scannedHeadings.add(heading.id); - // Add heading level as a field to improve search relevance const headingLevel = getHeadingLevel(heading); - // Root page headings (h1) should get higher priority const isRootHeading = headingLevel === 1 || heading.content === page.title; - indexes.push(createIndex(heading.content, heading.id, heading.content, { + + // Update hierarchy context + updateHierarchyContext(heading, heading.content); + + // Determine record type based on heading level + let recordType: BaseIndex['type'] = 'content'; + if (headingLevel >= 1 && headingLevel <= 5) { + recordType = `lvl${headingLevel}` as BaseIndex['type']; + } + + // Add heading as separate record + indexes.push(createIndex(heading.content, heading.id, heading.content, recordType, { heading_level: headingLevel, is_root_heading: isRootHeading })); } + + // Add content record with current hierarchy context + if (p.content && p.content !== heading?.content) { + indexes.push(createIndex(heading?.content, heading?.id, p.content, 'content')); + } }); return indexes;