Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/layout/Search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ const typesenseClient = new Client({
});

export default function NcSearchDialog(props: SharedProps) {
return <SearchDialog typesenseClient={typesenseClient} typesenseCollection="noco-docs-v2" {...props} />;
return <SearchDialog typesenseClient={typesenseClient} typesenseCollection="noco-docs-v3" {...props} />;
}
2 changes: 1 addition & 1 deletion scripts/syncIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const client = new Client({
apiKey: process.env?.TYPE_SENSE_ADMIN_API,
})

const collectionName = 'noco-docs-v2';
const collectionName = 'noco-docs-v3';

sync(client, {
collection: collectionName,
Expand Down
37 changes: 21 additions & 16 deletions utils/search/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,30 @@ export async function searchDocs(
query: string,
tag?: string,
): Promise<SortedResult[]> {
// Define search parameters based on Typesense capabilities
// Define search parameters with DocSearch-style hierarchical boosting
const searchParams: any = {
q: query,
query_by: 'title,section,content',
query_by_weights: '6,4,1', // Give even higher weight to title
prefix: true, // Enable prefix searching
infix: 'always', // Enable infix searching to match parts of words
query_by: 'title,lvl0,lvl1,lvl2,lvl3,lvl4,lvl5,section,content',
query_by_weights: '10,9,8,6,4,3,2,5,1', // Hierarchical weighting
typo_tolerance: true, // Enable typo tolerance
num_typos: 2, // Allow up to 2 typos
num_typos: 4, // Reduced typos for better precision
boost: {
is_root_heading: 2, // Boost root headings
heading_level: {
value: 1, // Boost h1 headings
function: "reciprocal" // Lower heading levels get less boost
}
type: {
'lvl0': 10, // Highest boost for main sections
'lvl1': 8, // High boost for page titles
'lvl2': 6, // Medium-high boost for major headings
'lvl3': 4, // Medium boost for subsections
'lvl4': 3, // Lower boost for minor headings
'lvl5': 2, // Lowest boost for smallest headings
'content': 1 // Base boost for content
},
is_root_heading: 2, // Additional boost for root headings
is_main_content: 1 // Boost main content
},
sort_by: '_text_match:asc',
per_page: 15, // Increase results per page
sort_by: '_text_match:desc', // Sort by relevance
per_page: 12, // Increase results per page
filter_by: undefined as string | undefined,
contextual_search: true,
contextual_search: false,
};

if (tag) {
Expand All @@ -86,9 +90,10 @@ export async function searchDocs(
.search({
...searchParams,
q: '*',
per_page: 8,
per_page: 12,
group_by: 'page_id',
group_limit: 1
group_limit: 1,
sort_by: 'type:asc' // Show pages first
});

return groupResults(results.hits || [])
Expand Down
154 changes: 140 additions & 14 deletions utils/search/typesense.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ export interface BaseIndex {
section_id?: string;
content: string;
description?: string;

// DocSearch-style hierarchy
lvl0?: string; // Main section/category
lvl1?: string; // Page title
lvl2?: string; // Major headings (h2)
lvl3?: string; // Subsections (h3)
lvl4?: string; // Minor headings (h4)
lvl5?: string; // Smallest headings (h5)

// Enhanced fields
type: 'lvl0' | 'lvl1' | 'lvl2' | 'lvl3' | 'lvl4' | 'lvl5' | 'content';
hierarchy: {
lvl0?: string;
lvl1?: string;
lvl2?: string;
lvl3?: string;
lvl4?: string;
lvl5?: string;
};
anchor?: string;
heading_level?: number;
is_root_heading?: boolean;
is_main_content: boolean;
[key: string]: unknown;
}

Expand Down Expand Up @@ -88,6 +111,17 @@ export async function createOrUpdateCollection(
{name: 'content', type: 'string'},
{name: 'heading_level', type: 'int32', optional: true},
{name: 'is_root_heading', type: 'bool', optional: true},

// DocSearch-style hierarchy fields
{name: 'lvl0', type: 'string', optional: true},
{name: 'lvl1', type: 'string', optional: true},
{name: 'lvl2', type: 'string', optional: true},
{name: 'lvl3', type: 'string', optional: true},
{name: 'lvl4', type: 'string', optional: true},
{name: 'lvl5', type: 'string', optional: true},
{name: 'type', type: 'string', facet: true, sort: true},
{name: 'anchor', type: 'string', optional: true},
{name: 'is_main_content', type: 'bool', optional: true},
],
};

Expand All @@ -106,18 +140,81 @@ function toIndex(page: DocumentRecord): BaseIndex[] {
const scannedHeadings = new Set<string>();
let relatedTopicsFound = false;

// Track hierarchical context
const hierarchyContext = {
lvl0: getPageCategory(page),
lvl1: page.title,
lvl2: undefined as string | undefined,
lvl3: undefined as string | undefined,
lvl4: undefined as string | undefined,
lvl5: undefined as string | undefined,
};

// Get heading level (h1, h2, etc.)
function getHeadingLevel(heading: any): number {
if (!heading || !heading.depth) return 0;
return heading.depth;
}

// Get page category from URL or tag
function getPageCategory(page: DocumentRecord): string {
if (page.tag) return page.tag;

// Extract category from URL path
const urlPath = page.url.replace(/^.*\/docs\//, '');
const pathParts = urlPath.split('/');

if (pathParts.length > 1) {
return pathParts[0].replace(/-/g, ' ');
}

return 'Documentation';
}

// Update hierarchy context based on heading level
function updateHierarchyContext(heading: any, content: string) {
const level = getHeadingLevel(heading);

switch (level) {
case 1:
hierarchyContext.lvl1 = content;
hierarchyContext.lvl2 = undefined;
hierarchyContext.lvl3 = undefined;
hierarchyContext.lvl4 = undefined;
hierarchyContext.lvl5 = undefined;
break;
case 2:
hierarchyContext.lvl2 = content;
hierarchyContext.lvl3 = undefined;
hierarchyContext.lvl4 = undefined;
hierarchyContext.lvl5 = undefined;
break;
case 3:
hierarchyContext.lvl3 = content;
hierarchyContext.lvl4 = undefined;
hierarchyContext.lvl5 = undefined;
break;
case 4:
hierarchyContext.lvl4 = content;
hierarchyContext.lvl5 = undefined;
break;
case 5:
case 6:
hierarchyContext.lvl5 = content;
break;
}
}

// Create hierarchical index record
function createIndex(
section: string | undefined,
sectionId: string | undefined,
content: string,
type: BaseIndex['type'] = 'content',
additionalFields: Partial<BaseIndex> = {}
): BaseIndex {
const currentHierarchy = { ...hierarchyContext };

return {
id: `${page._id}-${(id++).toString()}`,
title: page.title,
Expand All @@ -128,13 +225,35 @@ function toIndex(page: DocumentRecord): BaseIndex[] {
section_id: sectionId,
content,
description: page.description,

// DocSearch-style hierarchy
lvl0: currentHierarchy.lvl0,
lvl1: currentHierarchy.lvl1,
lvl2: currentHierarchy.lvl2,
lvl3: currentHierarchy.lvl3,
lvl4: currentHierarchy.lvl4,
lvl5: currentHierarchy.lvl5,

// Enhanced fields
type,
hierarchy: currentHierarchy,
anchor: sectionId,
is_main_content: type === 'content',

...(page.extra_data || {}),
...additionalFields
};
}

// Add page title as lvl1 record
indexes.push(createIndex(undefined, undefined, page.title, 'lvl1', {
heading_level: 1,
is_root_heading: true
}));

// Add page description as content if available
if (page.description) {
indexes.push(createIndex(undefined, undefined, page.description));
indexes.push(createIndex(undefined, undefined, page.description, 'content'));
}

// Find if there's a "Related topics" heading
Expand All @@ -143,7 +262,8 @@ function toIndex(page: DocumentRecord): BaseIndex[] {
h.content.toLowerCase() === 'related topic' ||
h.content.toLowerCase() === 'related resources' ||
h.content.toLowerCase() === 'related resource' ||
h.content.toLowerCase() === 'related fields'
h.content.toLowerCase() === 'related fields' ||
h.content.toLowerCase() === 'available tools'
);

page.structured.contents.forEach((p) => {
Expand All @@ -158,11 +278,6 @@ function toIndex(page: DocumentRecord): BaseIndex[] {
// Check if we've reached the "Related topics" section
if (heading && relatedTopicsHeading && heading.id === relatedTopicsHeading.id) {
relatedTopicsFound = true;
// Include the "Related topics" heading itself
if (!scannedHeadings.has(heading.id)) {
scannedHeadings.add(heading.id);
indexes.push(createIndex(heading.content, heading.id, heading.content));
}
return;
}

Expand All @@ -171,21 +286,32 @@ function toIndex(page: DocumentRecord): BaseIndex[] {
return;
}

if (p.content && p.content !== heading?.content) {
indexes.push(createIndex(heading?.content, heading?.id, p.content));
}

// Process heading first to update hierarchy context
if (heading && !scannedHeadings.has(heading.id)) {
scannedHeadings.add(heading.id);
// Add heading level as a field to improve search relevance
const headingLevel = getHeadingLevel(heading);
// Root page headings (h1) should get higher priority
const isRootHeading = headingLevel === 1 || heading.content === page.title;
indexes.push(createIndex(heading.content, heading.id, heading.content, {

// Update hierarchy context
updateHierarchyContext(heading, heading.content);

// Determine record type based on heading level
let recordType: BaseIndex['type'] = 'content';
if (headingLevel >= 1 && headingLevel <= 5) {
recordType = `lvl${headingLevel}` as BaseIndex['type'];
}

// Add heading as separate record
indexes.push(createIndex(heading.content, heading.id, heading.content, recordType, {
heading_level: headingLevel,
is_root_heading: isRootHeading
}));
}

// Add content record with current hierarchy context
if (p.content && p.content !== heading?.content) {
indexes.push(createIndex(heading?.content, heading?.id, p.content, 'content'));
}
});

return indexes;
Expand Down
Loading