From 6af79ec926d0e93ddf73899cbdd10b2b83b1ef7c Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Sat, 28 Feb 2026 17:15:37 +1100 Subject: [PATCH 1/8] fix(salesforce): resolve searchContact query failures and createCase status ID bug searchContact: - Replace SELECT FIELDS(All) with a two-step approach (SELECT Id + sobject.retrieve) to avoid requiring the "View All Data" Salesforce permission, which was causing 400 errors across orgs - Change LIKE to = to support orgs with Salesforce Shield encryption on fields such as Email, where LIKE is not a supported operator - Add ORDER BY CreatedDate DESC for deterministic results when multiple records match - Add onErrorContact child node so errors route correctly instead of crashing with "Cannot read properties of undefined (reading 'id')" - Add null guard on onErrorChild to prevent cascade crash if child is not yet wired in an existing flow - Improve error logging to include Salesforce response body for easier debugging createCase: - Fix Status optionsResolver returning status.Id (the CaseStatus metadata record ID) instead of status.MasterLabel (the actual picklist string value e.g. "New"), which was causing cases to be created with an ID in the Status field rather than the human-readable value authenticate: - Fix missing semicolon on closing brace of exported arrow function (lint error) --- extensions/salesforce/package.json | 4 +- extensions/salesforce/src/authenticate.ts | 2 +- extensions/salesforce/src/module.ts | 3 +- extensions/salesforce/src/nodes/createCase.ts | 2 +- .../salesforce/src/nodes/searchContact.ts | 81 ++++++++++++++----- 5 files changed, 66 insertions(+), 26 deletions(-) diff --git a/extensions/salesforce/package.json b/extensions/salesforce/package.json index a4e2120e2..1207166fd 100644 --- a/extensions/salesforce/package.json +++ b/extensions/salesforce/package.json @@ -1,6 +1,6 @@ { "name": "salesforce", - "version": "4.4.0", + "version": "4.5.0", "description": "This Extension integrates with all Salesforce Clouds", "main": "build/module.js", "scripts": { @@ -32,4 +32,4 @@ "@cognigy/extension-tools": "^0.16.1", "axios": "^1.13.5" } -} +} \ No newline at end of file diff --git a/extensions/salesforce/src/authenticate.ts b/extensions/salesforce/src/authenticate.ts index 3c5c83b11..bb566411c 100644 --- a/extensions/salesforce/src/authenticate.ts +++ b/extensions/salesforce/src/authenticate.ts @@ -133,4 +133,4 @@ export const authenticate = async (oauthConnection: IConnection["oauthConnection query, sobject }; -} \ No newline at end of file +}; \ No newline at end of file diff --git a/extensions/salesforce/src/module.ts b/extensions/salesforce/src/module.ts index 813431fb9..e97ea2502 100644 --- a/extensions/salesforce/src/module.ts +++ b/extensions/salesforce/src/module.ts @@ -4,7 +4,7 @@ import { onEmptyQueryResults, onFoundQueryResults, queryNode } from "./nodes/que import { createCaseNode, onErrorCreateCase, onSuccessCreateCase } from "./nodes/createCase"; import { getCaseNode, onErrorGetCase, onSuccessGetCase } from "./nodes/getCase"; import { entityRequestNode, onErrorEntityRequest, onSuccessEntityRequest } from "./nodes/entityRequest"; -import { onFoundContact, onNotFoundContact, searchContactNode } from "./nodes/searchContact"; +import { onErrorContact, onFoundContact, onNotFoundContact, searchContactNode } from "./nodes/searchContact"; export default createExtension({ nodes: [ @@ -19,6 +19,7 @@ export default createExtension({ searchContactNode, onFoundContact, onNotFoundContact, + onErrorContact, queryNode, onFoundQueryResults, diff --git a/extensions/salesforce/src/nodes/createCase.ts b/extensions/salesforce/src/nodes/createCase.ts index 55f60d27d..f05e5abd5 100644 --- a/extensions/salesforce/src/nodes/createCase.ts +++ b/extensions/salesforce/src/nodes/createCase.ts @@ -101,7 +101,7 @@ export const createCaseNode = createNodeDescriptor({ // Step 3: Map statuses to "options array" return statuses.map((status: ISalesforceCaseStatus) => ({ label: status.MasterLabel, - value: status.Id, + value: status.MasterLabel, })); } catch (error) { const errorMessage = error instanceof Error diff --git a/extensions/salesforce/src/nodes/searchContact.ts b/extensions/salesforce/src/nodes/searchContact.ts index 649b1ceeb..d05b53320 100644 --- a/extensions/salesforce/src/nodes/searchContact.ts +++ b/extensions/salesforce/src/nodes/searchContact.ts @@ -200,7 +200,8 @@ export const searchContactNode = createNodeDescriptor({ dependencies: { children: [ "onFoundContact", - "onNotFoundContact" + "onNotFoundContact", + "onErrorContact" ] }, function: async ({ cognigy, config, childConfigs }: ISearchContactParams) => { @@ -208,35 +209,50 @@ export const searchContactNode = createNodeDescriptor({ const { contactField, contactFieldValue, oauthConnection, storeLocation, contextKey, inputKey } = config; try { - const salesforceConnection = await authenticate(oauthConnection); - const soql: string = `SELECT FIELDS(All) FROM Contact WHERE ${contactField} LIKE '${contactFieldValue}' LIMIT 200`; - const record = await salesforceConnection.query(soql, { autoFetch: true, maxFetch: 1 }); + // Step 1: Find the contact ID using the specified field + // Note: LIMIT 1 is used here because only one record is ever stored (records[0]). + // sobject.retrieve() is used in Step 2 to return all standard and custom fields + // without requiring the "View All Data" permission that FIELDS(All) demands. + const soql: string = `SELECT Id FROM Contact WHERE ${contactField} = '${contactFieldValue}' ORDER BY CreatedDate DESC LIMIT 1`; + const result = await salesforceConnection.query(soql); - if (record.records.length === 0) { - const onEmptyQueryResultsChild = childConfigs.find(child => child.type === "onNotFoundContact"); - api.setNextNode(onEmptyQueryResultsChild.id); + if (result.records.length === 0) { + const onNotFoundChild = childConfigs.find(child => child.type === "onNotFoundContact"); + api.setNextNode(onNotFoundChild.id); } else { - const onFoundQueryResultsChild = childConfigs.find(child => child.type === "onFoundContact"); - api.setNextNode(onFoundQueryResultsChild.id); - } + // Step 2: Retrieve the full contact record by ID — returns all standard and custom fields + const contactId = result.records[0].Id; + const fullContact = await salesforceConnection.sobject("Contact").retrieve(contactId); - if (storeLocation === "context") { - api.addToContext(contextKey, record?.records[0], "simple"); - } else { - // @ts-ignore - api.addToInput(inputKey, record?.records[0]); + const onFoundChild = childConfigs.find(child => child.type === "onFoundContact"); + api.setNextNode(onFoundChild.id); + + if (storeLocation === "context") { + api.addToContext(contextKey, fullContact, "simple"); + } else { + // @ts-ignore + api.addToInput(inputKey, fullContact); + } } } catch (error) { - const errorMessage = error instanceof Error - ? error.message - : JSON.stringify(error); + let errorMessage: string; + if (error instanceof Error) { + const axiosResponseData = (error as any)?.response?.data; + errorMessage = axiosResponseData + ? `${error.message} — ${JSON.stringify(axiosResponseData)}` + : error.message; + } else { + errorMessage = JSON.stringify(error); + } api.log("error", `searchContact execution failed: ${errorMessage}`); - const onErrorChild = childConfigs.find(child => child.type === "onErrorGetCase"); - api.setNextNode(onErrorChild.id); + const onErrorChild = childConfigs.find(child => child.type === "onErrorContact"); + if (onErrorChild) { + api.setNextNode(onErrorChild.id); + } if (storeLocation === "context") { api.addToContext(contextKey, errorMessage, "simple"); @@ -292,4 +308,27 @@ export const onNotFoundContact = createNodeDescriptor({ variant: "mini", showIcon: false } -}); \ No newline at end of file +}); + +export const onErrorContact = createNodeDescriptor({ + type: "onErrorContact", + parentType: "searchContact", + defaultLabel: "On Error", + constraints: { + editable: false, + deletable: false, + creatable: false, + movable: false, + placement: { + predecessor: { + whitelist: [] + } + } + }, + appearance: { + color: "#cf142b", + textColor: "white", + variant: "mini", + showIcon: false + } +}); From f52aec0862f80b53e2c07722bb5d6bdaae56fa9f Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Sat, 28 Feb 2026 21:55:43 +1100 Subject: [PATCH 2/8] add Salesforce Knowledge Connector with role-based access --- .../salesforceKnowledgeConnector.ts | 292 ++++++++++++++++++ extensions/salesforce/src/module.ts | 5 + 2 files changed, 297 insertions(+) create mode 100644 extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts new file mode 100644 index 000000000..dcfdbd613 --- /dev/null +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -0,0 +1,292 @@ +import { createKnowledgeConnector } from "@cognigy/extension-tools"; +import { authenticate } from "../authenticate"; + +interface IOAuthConnection { + consumerKey: string; + consumerSecret: string; + instanceUrl: string; +} + +/** + * Strip HTML tags and decode entities into plain readable text. + * Preserves structure for tables (pipe-separated) and lists (bullet points). + */ +function stripHtml(html: string): string { + if (!html) return ""; + return html + .replace(//gi, "\n") + .replace(/<\/p>/gi, "\n") + .replace(/<\/h[1-6]>/gi, "\n") + .replace(/<\/li>/gi, "\n") + .replace(/<\/tr>/gi, "\n") + .replace(/<\/td>/gi, " | ") + .replace(/<\/th>/gi, " | ") + .replace(/]*>/gi, "• ") + .replace(/<[^>]+>/g, "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " ") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/&[a-z]+;/gi, " ") + .replace(/[ \t]+/g, " ") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +/** + * Remove null bytes and non-printable control characters that can cause + * embedding/store failures. Preserves standard whitespace (newline, tab). + */ +function sanitizeText(text: string): string { + if (!text) return ""; + // Remove null bytes and ASCII control characters except \t (9), \n (10), \r (13) + return text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "").trim(); +} + +/** + * Split long text into chunks at natural boundaries (paragraphs → sentences → words). + */ +function chunkContent(text: string, maxSize: number = 800): string[] { + if (!text || text.length === 0) return []; + if (text.length <= maxSize) return [text]; + + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxSize) { + chunks.push(remaining.trim()); + break; + } + + let splitAt = remaining.lastIndexOf("\n\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); + if (splitAt <= 0) splitAt = maxSize; + + const chunk = remaining.substring(0, splitAt).trim(); + if (chunk.length > 0) chunks.push(chunk); + remaining = remaining.substring(splitAt).trim(); + } + + return chunks.filter(c => c.length > 0); +} + +/** + * Build readable plain text from a set of article fields. + * Each non-empty field is prefixed with a human-readable label. + */ +function buildArticleText(title: string, summary: string, fields: string[], article: any): string { + const sections: string[] = []; + + if (title) sections.push(`# ${sanitizeText(title)}`); + if (summary) sections.push(sanitizeText(stripHtml(summary))); + + for (const field of fields) { + const raw = article[field]; + if (!raw) continue; + const content = sanitizeText(stripHtml(String(raw))); + if (!content) continue; + // Convert field API name to a readable label: Manager_Actions__c → Manager Actions + const label = field.replace(/__c$/i, "").replace(/_/g, " "); + sections.push(`${label}:\n${content}`); + } + + return sections.join("\n\n"); +} + +export const salesforceKnowledgeConnector = createKnowledgeConnector({ + type: "salesforceKnowledgeConnector", + label: "Salesforce Knowledge", + summary: "Imports published Salesforce Knowledge articles into Cognigy Knowledge AI with role-based separation for supervisors and managers", + fields: [ + { + key: "oauthConnection", + label: "Salesforce Connected App", + type: "connection", + params: { + connectionType: "oauth", + required: true + } + }, + { + key: "knowledgeApiName", + label: "Knowledge Article Object API Name", + type: "text", + defaultValue: "Knowledge_Article__kav", + description: "The API name of your Salesforce Knowledge Article object, e.g. Knowledge_Article__kav", + params: { required: true } + }, + { + key: "language", + label: "Language", + type: "text", + defaultValue: "en_US", + description: "Language code to filter published articles, e.g. en_US", + params: { required: true } + }, + { + key: "agentFields", + label: "Agent Content Fields", + type: "textArray", + defaultValue: [ + "Overview__c", + "Details__c", + "Inclusions__c", + "Exclusions__c", + "Information__c", + "Processing_Steps_Text__c", + "Questions__c", + "Answer__c", + "Scripting__c", + "Actions__c" + ], + description: "API names of article fields to include in agent-accessible knowledge. Content from these fields will be indexed for all agent flows.", + params: { required: true } + }, + { + key: "agentTags", + label: "Agent Knowledge Tags", + type: "chipInput", + defaultValue: ["agent"], + description: "Tags applied to agent knowledge sources. Agent Copilot should filter by this tag. Supervisor Copilot should filter by both this tag and the supervisor tag to see all content. Press ENTER to add a tag." + }, + { + key: "supervisorFields", + label: "Supervisor / Manager Content Fields", + type: "textArray", + defaultValue: [ + "Manager_Actions__c", + "Manager_information__c", + "Manager_processing_steps__c", + "Manager_scripting__c" + ], + description: "API names of fields containing manager or supervisor-only content. A separate knowledge source tagged for supervisors will be created for each article that has content in these fields.", + params: { required: false } + }, + { + key: "supervisorTags", + label: "Supervisor Knowledge Tags", + type: "chipInput", + defaultValue: ["supervisor"], + description: "Tags applied to supervisor-only knowledge sources. Supervisor Copilot should filter by both this tag and the agent tag to see full article content including manager sections. Press ENTER to add a tag." + } + ] as const, + sections: [ + { + key: "supervisorAccess", + label: "Supervisor / Manager Access", + defaultCollapsed: true, + fields: ["supervisorFields", "supervisorTags"] + } + ], + form: [ + { type: "field", key: "oauthConnection" }, + { type: "field", key: "knowledgeApiName" }, + { type: "field", key: "language" }, + { type: "field", key: "agentFields" }, + { type: "field", key: "agentTags" }, + { type: "section", key: "supervisorAccess" } + ], + function: async ({ config, api }) => { + const { + oauthConnection, + knowledgeApiName, + language, + agentFields, + agentTags, + supervisorFields, + supervisorTags + } = config; + + const salesforceConnection = await authenticate(oauthConnection as IOAuthConnection); + + // Combine all content fields for a single SOQL query, deduplicated + const allContentFields = [...new Set([ + ...(agentFields as string[]), + ...(supervisorFields as string[]) + ])]; + + const soql = [ + `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, + `Language, LastPublishedDate, ${allContentFields.join(", ")}`, + `FROM ${knowledgeApiName}`, + `WHERE PublishStatus = 'Online'`, + `AND Language = '${language}'`, + `AND IsLatestVersion = true`, + `ORDER BY Title ASC` + ].join(" "); + + const result = await salesforceConnection.query(soql, { autoFetch: true }); + const articles = result.records; + + for (const article of articles) { + const title = article.Title || `Article ${article.ArticleNumber}`; + const summary = article.Summary || ""; + + const articleMeta = { + articleId: String(article.Id || ""), + articleNumber: String(article.ArticleNumber || ""), + knowledgeArticleId: String(article.KnowledgeArticleId || ""), + urlName: String(article.UrlName || ""), + language: String(article.Language || language), + lastPublishedDate: String(article.LastPublishedDate || "") + }; + + // --- Agent Knowledge Source --- + const agentText = buildArticleText(title, summary, agentFields as string[], article); + const agentChunks = chunkContent(agentText); + + if (agentChunks.length > 0) { + console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); + const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ + name: `[${articleMeta.articleNumber}] ${title}`, + tags: agentTags as string[], + chunkCount: agentChunks.length + }); + + for (let i = 0; i < agentChunks.length; i++) { + console.log(`[Salesforce KC] Agent chunk ${i + 1}/${agentChunks.length} for "${title}"`); + await api.createKnowledgeChunk({ + knowledgeSourceId: agentSourceId, + text: agentChunks[i], + data: { ...articleMeta, role: "agent" } + }); + } + } + + // --- Supervisor Knowledge Source --- + // Only created if at least one supervisor field has actual content + const hasSupervisorContent = (supervisorFields as string[]).some(field => { + const raw = article[field]; + return raw && sanitizeText(stripHtml(String(raw))).length > 0; + }); + + if (hasSupervisorContent) { + const supervisorText = buildArticleText(title, summary, supervisorFields as string[], article); + const supervisorChunks = chunkContent(supervisorText); + + if (supervisorChunks.length > 0) { + console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); + const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ + name: `[${articleMeta.articleNumber}] ${title} — Manager`, + tags: supervisorTags as string[], + chunkCount: supervisorChunks.length + }); + + for (let i = 0; i < supervisorChunks.length; i++) { + console.log(`[Salesforce KC] Supervisor chunk ${i + 1}/${supervisorChunks.length} for "${title}"`); + await api.createKnowledgeChunk({ + knowledgeSourceId: supervisorSourceId, + text: supervisorChunks[i], + data: { ...articleMeta, role: "supervisor" } + }); + } + } + } + } + } +}); diff --git a/extensions/salesforce/src/module.ts b/extensions/salesforce/src/module.ts index e97ea2502..5e8ca1ece 100644 --- a/extensions/salesforce/src/module.ts +++ b/extensions/salesforce/src/module.ts @@ -1,5 +1,6 @@ import { createExtension } from "@cognigy/extension-tools"; import { oauth } from "./connections/oauth"; +import { salesforceKnowledgeConnector } from "./knowledge-connectors/salesforceKnowledgeConnector"; import { onEmptyQueryResults, onFoundQueryResults, queryNode } from "./nodes/query"; import { createCaseNode, onErrorCreateCase, onSuccessCreateCase } from "./nodes/createCase"; import { getCaseNode, onErrorGetCase, onSuccessGetCase } from "./nodes/getCase"; @@ -34,6 +35,10 @@ export default createExtension({ oauth ], + knowledge: [ + salesforceKnowledgeConnector + ], + options: { label: "Salesforce" } From d438a54fce9ce23acccf60fddf47edff3220ae4e Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Wed, 4 Mar 2026 23:05:59 +1100 Subject: [PATCH 3/8] Salesforce knowledge improvements * Features: * - Local state file (import-state.json) tracks what's been imported and when * - Skip articles that are already up-to-date (same LastPublishedDate) * - Delete + recreate articles whose content has changed * - Retry with exponential backoff on 405/429/502/503 * - --from=N flag to resume from a specific article index (1-based) --- extensions/salesforce/import-all.ts | 493 ++++++++++++++++++ extensions/salesforce/import-one.ts | 210 ++++++++ extensions/salesforce/package-lock.json | 198 ++++++- extensions/salesforce/package.json | 7 +- .../cognigyManagementApi.ts | 74 +++ .../salesforceKnowledgeConnector.ts | 187 ++++++- 6 files changed, 1153 insertions(+), 16 deletions(-) create mode 100644 extensions/salesforce/import-all.ts create mode 100644 extensions/salesforce/import-one.ts create mode 100644 extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts diff --git a/extensions/salesforce/import-all.ts b/extensions/salesforce/import-all.ts new file mode 100644 index 000000000..0488ae275 --- /dev/null +++ b/extensions/salesforce/import-all.ts @@ -0,0 +1,493 @@ +/** + * Full import of all published Salesforce Knowledge articles into Cognigy Knowledge AI. + * Same logic as the connector — fetches from Salesforce, processes text, creates + * sources + chunks via the Cognigy REST API. + * Run: npx ts-node import-all.ts + * Resume: npx ts-node import-all.ts --from=235 + */ + +import * as dotenv from "dotenv"; +dotenv.config(); + +import axios, { AxiosInstance } from "axios"; +import * as fs from "fs"; +import * as path from "path"; +import { authenticate } from "./src/authenticate"; + +// ─── State file ─────────────────────────────────────────────────────────────── + +const STATE_FILE = path.join(__dirname, "import-state.json"); + +interface ArticleState { + agentSourceId?: string; + supervisorSourceId?: string; + lastPublishedDate: string; +} + +type ImportState = Record; // keyed by articleNumber + +function loadState(): ImportState { + try { + if (fs.existsSync(STATE_FILE)) { + return JSON.parse(fs.readFileSync(STATE_FILE, "utf8")); + } + } catch { + console.warn("Warning: Could not read import-state.json, starting fresh."); + } + return {}; +} + +function saveState(state: ImportState): void { + fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2), "utf8"); +} + +// ─── Text helpers ───────────────────────────────────────────────────────────── + +function stripHtml(html: string): string { + if (!html) return ""; + return html + .replace(//gi, "\n") + .replace(/<\/p>/gi, "\n") + .replace(/<\/h[1-6]>/gi, "\n") + .replace(/<\/li>/gi, "\n") + .replace(/<\/tr>/gi, "\n") + .replace(/<\/td>/gi, " | ") + .replace(/<\/th>/gi, " | ") + .replace(/]*>/gi, "- ") + .replace(/<[^>]+>/g, "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " ") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/&[a-z]+;/gi, " ") + .replace(/[ \t]+/g, " ") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +function sanitizeText(text: string): string { + if (!text) return ""; + return text + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") + .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") + .replace(/[ \t]{2,}/g, " ") + .trim(); +} + +function chunkContent(text: string, maxSize: number = 2000): string[] { + if (!text || text.length === 0) return []; + if (text.length <= maxSize) return [text]; + const chunks: string[] = []; + let remaining = text; + while (remaining.length > 0) { + if (remaining.length <= maxSize) { + chunks.push(remaining.trim()); + break; + } + let splitAt = remaining.lastIndexOf("\n\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); + if (splitAt <= 0) splitAt = maxSize; + const chunk = remaining.substring(0, splitAt).trim(); + if (chunk.length > 0) chunks.push(chunk); + remaining = remaining.substring(splitAt).trim(); + } + return chunks.filter((c) => c.length > 0); +} + +function sanitizeSourceName(name: string): string { + return name + .replace(/\u2013|\u2014/g, "-") + .replace(/&/g, "and") + .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") + .replace(/[ \t]{2,}/g, " ") + .replace(/-{2,}/g, "-") + .trim(); +} + +function buildArticleText( + title: string, + summary: string, + fields: string[], + article: any, +): string { + const sections: string[] = []; + if (title) sections.push(`# ${sanitizeText(title)}`); + if (summary) sections.push(sanitizeText(stripHtml(summary))); + for (const field of fields) { + const raw = article[field]; + if (!raw) continue; + const content = sanitizeText(stripHtml(String(raw))); + if (!content) continue; + const label = field.replace(/__c$/i, "").replace(/_/g, " "); + sections.push(`${label}:\n${content}`); + } + return sections.join("\n\n"); +} + +// ─── Cognigy REST API helpers ───────────────────────────────────────────────── + +const STORE_ID = process.env.COGNIGY_KNOWLEDGE_STORE_ID!; + +function makeCognigyApi(): AxiosInstance { + return axios.create({ + baseURL: process.env.COGNIGY_API_BASE, + headers: { "X-API-Key": process.env.COGNIGY_API_KEY }, + }); +} + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +/** Detect AWS WAF / CAPTCHA HTML response instead of JSON */ +function isWafResponse(err: any): boolean { + const body = err.response?.data; + if (typeof body === "string" && body.includes("( + fn: () => Promise, + label: string, + maxRetries = 5, +): Promise { + let delay = 2000; + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + return await fn(); + } catch (err: any) { + const status = err.response?.status; + + if (isWafResponse(err)) { + console.warn( + ` [WAF detected] ${label} — AWS WAF challenge returned. Waiting 60s before retry...`, + ); + await sleep(60000); + // Reset delay after WAF cooldown + delay = 2000; + continue; + } + + const retryable = + status === 405 || + status === 429 || + status === 503 || + status === 502 || + !status; + if (retryable && attempt < maxRetries) { + console.warn( + ` [retry ${attempt}/${maxRetries - 1}] ${label} — HTTP ${status ?? err.message}, waiting ${delay / 1000}s...`, + ); + await sleep(delay); + delay = Math.min(delay * 2, 30000); + } else { + throw err; + } + } + } + throw new Error(`${label} failed after ${maxRetries} attempts`); +} + +/** Returns true if the error message indicates a duplicate name conflict */ +function isDuplicateNameError(err: any): boolean { + const detail: string = err.response?.data?.detail || err.message || ""; + return detail.toLowerCase().includes("already exists"); +} + +async function createSource(api: AxiosInstance, name: string): Promise { + const resp = await withRetry( + () => + api.post(`/v2.0/knowledgestores/${STORE_ID}/sources`, { + name, + type: "manual", + }), + `createSource(${name})`, + ); + return resp.data.knowledgeSource._id as string; +} + +async function createChunk( + api: AxiosInstance, + sourceId: string, + text: string, + order: number, + data: Record, +): Promise { + await withRetry( + () => + api.post(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}/chunks`, { + text, + order, + data, + }), + `createChunk(order=${order})`, + ); +} + +async function deleteSource( + api: AxiosInstance, + sourceId: string, +): Promise { + try { + await api.delete(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`); + } catch { + // best-effort cleanup + } +} + +// ─── Field config ───────────────────────────────────────────────────────────── + +const agentFields = [ + "Overview__c", + "Details__c", + "Inclusions__c", + "Exclusions__c", + "Information__c", + "Processing_Steps_Text__c", + "Questions__c", + "Answer__c", + "Scripting__c", + "Actions__c", +]; +const supervisorFields = [ + "Manager_Actions__c", + "Manager_information__c", + "Manager_processing_steps__c", + "Manager_scripting__c", +]; + +// ─── Per-article import ─────────────────────────────────────────────────────── + +type ImportResult = "imported" | "updated" | "skipped" | "failed" | "empty"; + +async function importArticle( + cognigy: AxiosInstance, + article: any, + instanceUrl: string, + index: number, + total: number, + state: ImportState, +): Promise { + const title = article.Title || `Article ${article.ArticleNumber}`; + const summary = article.Summary || ""; + const articleNum = String(article.ArticleNumber || ""); + const knowledgeArticleId = String(article.KnowledgeArticleId || ""); + const articleUrl = `${instanceUrl}/lightning/articles/${knowledgeArticleId}`; + const lastPublishedDate = String(article.LastPublishedDate || ""); + const prefix = `[${index}/${total}] [${articleNum}]`; + + const agentText = buildArticleText(title, summary, agentFields, article); + const agentChunks = chunkContent(agentText); + + const hasSupervisor = supervisorFields.some((f) => { + const raw = article[f]; + return raw && sanitizeText(stripHtml(String(raw))).length > 0; + }); + const supervisorChunks = hasSupervisor + ? chunkContent(buildArticleText(title, summary, supervisorFields, article)) + : []; + + if (agentChunks.length === 0 && supervisorChunks.length === 0) { + console.log(`${prefix} SKIP — no content`); + return "empty"; + } + + // ── Check if already imported and up-to-date ────────────────────────────── + const existing = state[articleNum]; + if (existing) { + if (existing.lastPublishedDate === lastPublishedDate) { + console.log(`${prefix} SKIP — up to date (${lastPublishedDate})`); + return "skipped"; + } + // Content may have changed — delete old sources and reimport + console.log( + `${prefix} UPDATE — date changed (${existing.lastPublishedDate} → ${lastPublishedDate})`, + ); + if (existing.agentSourceId) { + await deleteSource(cognigy, existing.agentSourceId); + } + if (existing.supervisorSourceId) { + await deleteSource(cognigy, existing.supervisorSourceId); + } + delete state[articleNum]; + saveState(state); + } + + let agentSourceId: string | undefined; + let supervisorSourceId: string | undefined; + let anyFailed = false; + + // ── Agent source ────────────────────────────────────────────────────────── + if (agentChunks.length > 0) { + const sourceName = sanitizeSourceName(`[${articleNum}] ${title}`); + try { + agentSourceId = await createSource(cognigy, sourceName); + for (let i = 0; i < agentChunks.length; i++) { + await createChunk(cognigy, agentSourceId, agentChunks[i], i, { + articleNumber: articleNum, + role: "agent", + url: articleUrl, + }); + } + console.log(`${prefix} Agent OK — ${agentChunks.length} chunk(s)`); + } catch (err: any) { + const msg = err.response?.data?.detail || err.message; + + // If duplicate error and we don't have a state entry, the source + // exists from a previous run that we have no record of — treat as + // up-to-date (can't retrieve the source ID without the list API). + if (isDuplicateNameError(err)) { + console.warn( + `${prefix} Agent SKIP — source already exists (no state record; treating as current)`, + ); + } else { + console.error(`${prefix} Agent FAILED — ${msg}`); + anyFailed = true; + } + if (agentSourceId) await deleteSource(cognigy, agentSourceId); + agentSourceId = undefined; + } + } + + // ── Supervisor source ───────────────────────────────────────────────────── + if (supervisorChunks.length > 0) { + const sourceName = sanitizeSourceName(`[${articleNum}] ${title} - Manager`); + try { + supervisorSourceId = await createSource(cognigy, sourceName); + for (let i = 0; i < supervisorChunks.length; i++) { + await createChunk(cognigy, supervisorSourceId, supervisorChunks[i], i, { + articleNumber: articleNum, + role: "supervisor", + url: articleUrl, + }); + } + console.log( + `${prefix} Supervisor OK — ${supervisorChunks.length} chunk(s)`, + ); + } catch (err: any) { + const msg = err.response?.data?.detail || err.message; + if (isDuplicateNameError(err)) { + console.warn( + `${prefix} Supervisor SKIP — source already exists (no state record; treating as current)`, + ); + } else { + console.error(`${prefix} Supervisor FAILED — ${msg}`); + anyFailed = true; + } + if (supervisorSourceId) await deleteSource(cognigy, supervisorSourceId); + supervisorSourceId = undefined; + } + } + + // ── Persist state ───────────────────────────────────────────────────────── + // Record even partial success so we can skip on re-run + const hadPreviousEntry = !!existing; + state[articleNum] = { + ...(agentSourceId ? { agentSourceId } : {}), + ...(supervisorSourceId ? { supervisorSourceId } : {}), + lastPublishedDate, + }; + saveState(state); + + if (anyFailed) return "failed"; + return hadPreviousEntry ? "updated" : "imported"; +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +async function main() { + console.log("Authenticating with Salesforce..."); + const sf = await authenticate({ + consumerKey: process.env.SF_CONSUMER_KEY!, + consumerSecret: process.env.SF_CONSUMER_SECRET!, + instanceUrl: process.env.SF_INSTANCE_URL!, + }); + console.log(`Authenticated. Instance: ${sf.instanceUrl}\n`); + + const allFields = [...new Set([...agentFields, ...supervisorFields])]; + const soql = [ + `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, + `Language, LastPublishedDate, ${allFields.join(", ")}`, + `FROM Knowledge_Article__kav`, + `WHERE PublishStatus = 'Online'`, + `AND Language = 'en_US'`, + `AND IsLatestVersion = true`, + `ORDER BY Title ASC`, + ].join(" "); + + console.log("Fetching all published en_US articles..."); + const result = await sf.query(soql, { autoFetch: true }); + const articles = result.records; + console.log(`Found ${articles.length} articles.\n`); + + // Support --from N to resume from a specific article index (1-based) + const fromArg = process.argv.find((a) => a.startsWith("--from=")); + const startFrom = fromArg + ? Math.max(0, parseInt(fromArg.split("=")[1], 10) - 1) + : 0; + if (startFrom > 0) + console.log(`Resuming from article ${startFrom + 1}/${articles.length}\n`); + + const state = loadState(); + const stateCount = Object.keys(state).length; + if (stateCount > 0) { + console.log( + `State file loaded: ${stateCount} article(s) already tracked.\n`, + ); + } + + console.log("=".repeat(70)); + + const cognigy = makeCognigyApi(); + const startTime = Date.now(); + + let imported = 0; + let updated = 0; + let skipped = 0; + let empty = 0; + let failed = 0; + + for (let i = startFrom; i < articles.length; i++) { + try { + const outcome = await importArticle( + cognigy, + articles[i], + sf.instanceUrl, + i + 1, + articles.length, + state, + ); + if (outcome === "imported") imported++; + else if (outcome === "updated") updated++; + else if (outcome === "skipped") skipped++; + else if (outcome === "empty") empty++; + else if (outcome === "failed") failed++; + } catch (err: any) { + failed++; + console.error( + `[${i + 1}/${articles.length}] Unexpected error: ${err.message}`, + ); + } + // Pause between every article to avoid WAF rate limiting + await sleep(1500); + } + + const elapsed = Math.round((Date.now() - startTime) / 1000); + + console.log("=".repeat(70)); + console.log(`\nDone in ${elapsed}s`); + console.log(` Imported (new): ${imported}`); + console.log(` Updated (changed): ${updated}`); + console.log(` Skipped (up to date): ${skipped}`); + console.log(` Skipped (no content): ${empty}`); + console.log(` Errors: ${failed}`); +} + +main().catch((err) => { + console.error("Fatal:", err.response?.data || err.message); + process.exit(1); +}); diff --git a/extensions/salesforce/import-one.ts b/extensions/salesforce/import-one.ts new file mode 100644 index 000000000..84a1ef383 --- /dev/null +++ b/extensions/salesforce/import-one.ts @@ -0,0 +1,210 @@ +/** + * Import one Salesforce Knowledge article directly into Cognigy Knowledge AI + * via REST API (bypasses the extension runtime). + * + * Run: npx ts-node import-one.ts + */ + +import * as dotenv from "dotenv"; +dotenv.config(); + +import axios from "axios"; +import { authenticate } from "./src/authenticate"; + +// ─── Same helpers as salesforceKnowledgeConnector.ts ───────────────────────── + +function stripHtml(html: string): string { + if (!html) return ""; + return html + .replace(//gi, "\n") + .replace(/<\/p>/gi, "\n") + .replace(/<\/h[1-6]>/gi, "\n") + .replace(/<\/li>/gi, "\n") + .replace(/<\/tr>/gi, "\n") + .replace(/<\/td>/gi, " | ") + .replace(/<\/th>/gi, " | ") + .replace(/]*>/gi, "- ") + .replace(/<[^>]+>/g, "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " ") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/&[a-z]+;/gi, " ") + .replace(/[ \t]+/g, " ") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +function sanitizeText(text: string): string { + if (!text) return ""; + return text + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") + .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") + .replace(/[ \t]{2,}/g, " ") + .trim(); +} + +function chunkContent(text: string, maxSize: number = 2000): string[] { + if (!text || text.length === 0) return []; + if (text.length <= maxSize) return [text]; + + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxSize) { + chunks.push(remaining.trim()); + break; + } + + let splitAt = remaining.lastIndexOf("\n\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); + if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); + if (splitAt <= 0) splitAt = maxSize; + + const chunk = remaining.substring(0, splitAt).trim(); + if (chunk.length > 0) chunks.push(chunk); + remaining = remaining.substring(splitAt).trim(); + } + + return chunks.filter(c => c.length > 0); +} + +function buildArticleText(title: string, summary: string, fields: string[], article: any): string { + const sections: string[] = []; + if (title) sections.push(`# ${sanitizeText(title)}`); + if (summary) sections.push(sanitizeText(stripHtml(summary))); + for (const field of fields) { + const raw = article[field]; + if (!raw) continue; + const content = sanitizeText(stripHtml(String(raw))); + if (!content) continue; + const label = field.replace(/__c$/i, "").replace(/_/g, " "); + sections.push(`${label}:\n${content}`); + } + return sections.join("\n\n"); +} + +// ─── Cognigy Knowledge REST API ─────────────────────────────────────────────── + +const cognigyApi = axios.create({ + baseURL: process.env.COGNIGY_API_BASE, + headers: { "X-API-Key": process.env.COGNIGY_API_KEY } +}); + +const STORE_ID = process.env.COGNIGY_KNOWLEDGE_STORE_ID!; + +async function createKnowledgeSource(name: string, tags: string[]): Promise { + const resp = await cognigyApi.post(`/v2.0/knowledgestores/${STORE_ID}/sources`, { + name, + type: "manual" + }); + const sourceId: string = resp.data.knowledgeSource._id; + + // Add tags via PATCH if any + if (tags.length > 0) { + try { + await cognigyApi.patch(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`, { + metaData: { tags } + }); + } catch { + console.warn(` Note: Could not set tags on source (non-critical)`); + } + } + return sourceId; +} + +async function createKnowledgeChunk(sourceId: string, text: string, order: number, metaData?: Record): Promise { + await cognigyApi.post(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}/chunks`, { + text, + order, + ...(metaData ? { data: metaData } : {}) + }); +} + +async function deleteKnowledgeSource(sourceId: string): Promise { + try { + await cognigyApi.delete(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`); + } catch (e: any) { + console.warn(` Warning: Could not delete source ${sourceId}: ${e.message}`); + } +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +async function main() { + const agentFields = [ + "Overview__c", "Details__c", "Inclusions__c", "Exclusions__c", + "Information__c", "Processing_Steps_Text__c", "Questions__c", + "Answer__c", "Scripting__c", "Actions__c" + ]; + + console.log("Authenticating with Salesforce..."); + const sf = await authenticate({ + consumerKey: process.env.SF_CONSUMER_KEY!, + consumerSecret: process.env.SF_CONSUMER_SECRET!, + instanceUrl: process.env.SF_INSTANCE_URL! + }); + console.log("Authenticated.\n"); + + // Fetch just ONE article (alphabetically first) + const soql = [ + `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, + `Language, LastPublishedDate, ${agentFields.join(", ")}`, + `FROM Knowledge_Article__kav`, + `WHERE PublishStatus = 'Online'`, + `AND Language = 'en_US'`, + `AND IsLatestVersion = true`, + `ORDER BY Title ASC`, + `LIMIT 1` + ].join(" "); + + console.log("Fetching one article from Salesforce..."); + const result = await sf.query(soql); + const article = result.records[0]; + if (!article) throw new Error("No article found"); + + const title = article.Title || `Article ${article.ArticleNumber}`; + const summary = article.Summary || ""; + const articleNum = String(article.ArticleNumber || ""); + const knowledgeArticleId = String(article.KnowledgeArticleId || ""); + const articleUrl = `${process.env.SF_INSTANCE_URL}/lightning/articles/${knowledgeArticleId}`; + + console.log(`Article: [${articleNum}] ${title}`); + + const agentText = buildArticleText(title, summary, agentFields, article); + const agentChunks = chunkContent(agentText); + + console.log(`Chunks: ${agentChunks.length}`); + agentChunks.forEach((c, i) => console.log(` Chunk ${i + 1}: ${c.length} chars`)); + + const sourceName = `[${articleNum}] ${title}`; + console.log(`\nCreating knowledge source: "${sourceName}"...`); + const sourceId = await createKnowledgeSource(sourceName, ["agent"]); + console.log(` Source ID: ${sourceId}`); + + console.log("Creating chunks..."); + for (let i = 0; i < agentChunks.length; i++) { + process.stdout.write(` Chunk ${i + 1}/${agentChunks.length}... `); + try { + await createKnowledgeChunk(sourceId, agentChunks[i], i, { articleNumber: articleNum, role: "agent", url: articleUrl }); + console.log("OK"); + } catch (err: any) { + console.log(`FAILED: ${err.response?.data?.detail || err.message}`); + console.log(" Cleaning up source..."); + await deleteKnowledgeSource(sourceId); + throw new Error(`Chunk ${i + 1} failed: ${err.response?.data?.detail || err.message}`); + } + } + + console.log(`\nDone! Article "[${articleNum}] ${title}" imported successfully.`); + console.log(`Check: https://jetstar-dev.cognigy.cloud/project/68e5bf2c1271c80086891c47/68e5bf2cd3f7785588c6af0b/knowledge/69a2c04eba85740ed7854db3`); +} + +main().catch(err => { + console.error("\nFatal error:", err.response?.data || err.message || err); + process.exit(1); +}); diff --git a/extensions/salesforce/package-lock.json b/extensions/salesforce/package-lock.json index 4ca87622c..b26efe03c 100644 --- a/extensions/salesforce/package-lock.json +++ b/extensions/salesforce/package-lock.json @@ -1,20 +1,22 @@ { "name": "salesforce", - "version": "4.4.0", + "version": "4.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "salesforce", - "version": "4.4.0", + "version": "4.5.0", "license": "MIT", "dependencies": { "@cognigy/extension-tools": "^0.16.1", "axios": "^1.13.5" }, "devDependencies": { - "@types/node": "^10.12.5", + "@types/node": "^10.17.60", "@types/qs": "^6.9.17", + "dotenv": "^17.3.1", + "ts-node": "^10.9.2", "tslint": "^6.1.2", "typescript": "^4.9.5" } @@ -50,6 +52,75 @@ "integrity": "sha512-hWvUZsdDnsfsncIryMolrij2SVMdiKQC5d5zQdF7snNPBiz3Bb2Pg3PD0mlWRMAQtxwXbvRt03jpH3QMiXS38w==", "license": "SEE LICENSE IN LICENSE" }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", + "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/node": { "version": "10.17.60", "resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.60.tgz", @@ -64,6 +135,32 @@ "dev": true, "license": "MIT" }, + "node_modules/acorn": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.5", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.5.tgz", + "integrity": "sha512-HEHNfbars9v4pgpW6SO1KSPkfoS0xVOM/9UzkJltjlsHZmJasxg8aXkuZa7SMf8vKGIBhpUsPluQSqhJFCqebw==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/ansi-styles": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", @@ -77,6 +174,13 @@ "node": ">=4" } }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", + "dev": true, + "license": "MIT" + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -203,6 +307,13 @@ "dev": true, "license": "MIT" }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true, + "license": "MIT" + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -222,6 +333,19 @@ "node": ">=0.3.1" } }, + "node_modules/dotenv": { + "version": "17.3.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.3.1.tgz", + "integrity": "sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -533,6 +657,13 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -703,6 +834,50 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, "node_modules/tslib": { "version": "1.14.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", @@ -769,12 +944,29 @@ "node": ">=4.2.0" } }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true, + "license": "MIT" + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "dev": true, "license": "ISC" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } } } } diff --git a/extensions/salesforce/package.json b/extensions/salesforce/package.json index 1207166fd..1eaa11b66 100644 --- a/extensions/salesforce/package.json +++ b/extensions/salesforce/package.json @@ -1,7 +1,8 @@ { "name": "salesforce", - "version": "4.5.0", + "version": "4.5.1", "description": "This Extension integrates with all Salesforce Clouds", + "authors": "Matt Muller ", "main": "build/module.js", "scripts": { "transpile": "tsc -p .", @@ -23,8 +24,10 @@ "author": "Cognigy GmbH", "license": "MIT", "devDependencies": { - "@types/node": "^10.12.5", + "@types/node": "^10.17.60", "@types/qs": "^6.9.17", + "dotenv": "^17.3.1", + "ts-node": "^10.9.2", "tslint": "^6.1.2", "typescript": "^4.9.5" }, diff --git a/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts b/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts new file mode 100644 index 000000000..4f74aba54 --- /dev/null +++ b/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts @@ -0,0 +1,74 @@ +/** + * Thin wrapper around the Cognigy Management REST API. + * + * Provides listing and deletion of knowledge sources — operations not + * exposed by the @cognigy/extension-tools SDK — for incremental sync. + * + * Auth: X-API-Key header (from Profile → API Keys in Cognigy.AI UI). + */ + +import axios from "axios"; + +export interface ManagedSource { + _id: string; + name: string; + description?: string; +} + +const REQUEST_TIMEOUT = 30_000; + +/** + * List all knowledge sources in a store, paginating automatically. + * + * GET {apiUrl}/v2.0/knowledgestores/{storeId}/knowledgesources?limit=100&skip=0 + */ +export async function listKnowledgeSources( + apiUrl: string, + apiKey: string, + storeId: string, +): Promise { + const all: ManagedSource[] = []; + const limit = 100; + let skip = 0; + + while (true) { + const res = await axios.get<{ data: ManagedSource[] }>( + `${apiUrl}/v2.0/knowledgestores/${storeId}/knowledgesources`, + { + params: { limit, skip }, + headers: { "X-API-Key": apiKey }, + timeout: REQUEST_TIMEOUT, + validateStatus: (s) => s >= 200 && s < 300, + }, + ); + + const page = res.data?.data ?? []; + all.push(...page); + + if (page.length < limit) break; + skip += limit; + } + + return all; +} + +/** + * Delete a single knowledge source by its ID. + * + * DELETE {apiUrl}/v2.0/knowledgestores/{storeId}/knowledgesources/{sourceId} + */ +export async function deleteKnowledgeSourceById( + apiUrl: string, + apiKey: string, + storeId: string, + sourceId: string, +): Promise { + await axios.delete( + `${apiUrl}/v2.0/knowledgestores/${storeId}/knowledgesources/${sourceId}`, + { + headers: { "X-API-Key": apiKey }, + timeout: REQUEST_TIMEOUT, + validateStatus: (s) => s >= 200 && s < 300, + }, + ); +} diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index dcfdbd613..df596c05a 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -1,5 +1,9 @@ import { createKnowledgeConnector } from "@cognigy/extension-tools"; import { authenticate } from "../authenticate"; +import { + deleteKnowledgeSourceById, + listKnowledgeSources, +} from "./cognigyManagementApi"; interface IOAuthConnection { consumerKey: string; @@ -21,7 +25,7 @@ function stripHtml(html: string): string { .replace(/<\/tr>/gi, "\n") .replace(/<\/td>/gi, " | ") .replace(/<\/th>/gi, " | ") - .replace(/]*>/gi, "• ") + .replace(/]*>/gi, "- ") .replace(/<[^>]+>/g, "") .replace(/&/g, "&") .replace(/</g, "<") @@ -41,14 +45,19 @@ function stripHtml(html: string): string { */ function sanitizeText(text: string): string { if (!text) return ""; - // Remove null bytes and ASCII control characters except \t (9), \n (10), \r (13) - return text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "").trim(); + // Remove control characters, then strip any remaining non-ASCII (>127) to avoid + // vector store rejection of unusual Unicode characters from HTML stripping. + return text + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") + .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") + .replace(/[ \t]{2,}/g, " ") + .trim(); } /** * Split long text into chunks at natural boundaries (paragraphs → sentences → words). */ -function chunkContent(text: string, maxSize: number = 800): string[] { +function chunkContent(text: string, maxSize: number = 2000): string[] { if (!text || text.length === 0) return []; if (text.length <= maxSize) return [text]; @@ -75,6 +84,21 @@ function chunkContent(text: string, maxSize: number = 800): string[] { return chunks.filter(c => c.length > 0); } +/** + * Sanitize a string for use as a Cognigy knowledge source name. + * Cognigy enforces a resource-name format: replaces/removes characters + * outside of letters, numbers, spaces, hyphens, and square brackets. + */ +function sanitizeSourceName(name: string): string { + return name + .replace(/\u2013|\u2014/g, "-") // en dash, em dash → hyphen + .replace(/&/g, "and") // & → and + .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") // special chars → space + .replace(/[ \t]{2,}/g, " ") // collapse multiple spaces + .replace(/-{2,}/g, "-") // collapse multiple hyphens + .trim(); +} + /** * Build readable plain text from a set of article fields. * Each non-empty field is prefixed with a human-readable label. @@ -173,6 +197,47 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ type: "chipInput", defaultValue: ["supervisor"], description: "Tags applied to supervisor-only knowledge sources. Supervisor Copilot should filter by both this tag and the agent tag to see full article content including manager sections. Press ENTER to add a tag." + }, + { + key: "syncMode", + label: "Sync Mode", + type: "select", + defaultValue: "full", + description: "Full: import all published articles. Incremental: only import articles modified since Last Sync Date.", + params: { + options: [ + { label: "Full", value: "full" }, + { label: "Incremental (filter by Last Sync Date)", value: "incremental" } + ] + } + }, + { + key: "lastSyncDate", + label: "Last Sync Date", + type: "text", + description: "ISO 8601 date used as a filter in Incremental mode, e.g. 2026-01-01T00:00:00Z. Only articles modified after this date will be imported.", + params: { required: false } + }, + { + key: "cognigyApiUrl", + label: "Cognigy API URL", + type: "text", + description: "Base URL of your Cognigy.AI instance, e.g. https://app.cognigy.ai — required for stale article removal.", + params: { required: false } + }, + { + key: "cognigyApiKey", + label: "Cognigy API Key", + type: "text", + description: "API key from Profile → API Keys in Cognigy.AI — required for stale article removal.", + params: { required: false } + }, + { + key: "knowledgeStoreId", + label: "Knowledge Store ID", + type: "text", + description: "The ID of the target knowledge store (visible in the store URL in Cognigy.AI) — required for stale article removal.", + params: { required: false } } ] as const, sections: [ @@ -181,6 +246,12 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ label: "Supervisor / Manager Access", defaultCollapsed: true, fields: ["supervisorFields", "supervisorTags"] + }, + { + key: "syncSettings", + label: "Sync Settings", + defaultCollapsed: true, + fields: ["syncMode", "lastSyncDate", "cognigyApiUrl", "cognigyApiKey", "knowledgeStoreId"] } ], form: [ @@ -189,7 +260,8 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ { type: "field", key: "language" }, { type: "field", key: "agentFields" }, { type: "field", key: "agentTags" }, - { type: "section", key: "supervisorAccess" } + { type: "section", key: "supervisorAccess" }, + { type: "section", key: "syncSettings" } ], function: async ({ config, api }) => { const { @@ -199,7 +271,12 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ agentFields, agentTags, supervisorFields, - supervisorTags + supervisorTags, + syncMode, + lastSyncDate, + cognigyApiUrl, + cognigyApiKey, + knowledgeStoreId } = config; const salesforceConnection = await authenticate(oauthConnection as IOAuthConnection); @@ -210,6 +287,11 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ...(supervisorFields as string[]) ])]; + // Incremental date filter + const dateFilter = (syncMode as string) === "incremental" && (lastSyncDate as string)?.trim() + ? `AND LastModifiedDate > ${(lastSyncDate as string).trim()}` + : ""; + const soql = [ `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, `Language, LastPublishedDate, ${allContentFields.join(", ")}`, @@ -217,11 +299,17 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ `WHERE PublishStatus = 'Online'`, `AND Language = '${language}'`, `AND IsLatestVersion = true`, + dateFilter, `ORDER BY Title ASC` - ].join(" "); + ].filter(Boolean).join(" "); + + if (dateFilter) { + console.log(`[Salesforce KC] Incremental mode: fetching articles modified after ${(lastSyncDate as string).trim()}`); + } const result = await salesforceConnection.query(soql, { autoFetch: true }); const articles = result.records; + console.log(`[Salesforce KC] ${articles.length} article(s) to process`); for (const article of articles) { const title = article.Title || `Article ${article.ArticleNumber}`; @@ -243,7 +331,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ if (agentChunks.length > 0) { console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ - name: `[${articleMeta.articleNumber}] ${title}`, + name: sanitizeSourceName(`[${articleMeta.articleNumber}] ${title}`), tags: agentTags as string[], chunkCount: agentChunks.length }); @@ -253,7 +341,11 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ await api.createKnowledgeChunk({ knowledgeSourceId: agentSourceId, text: agentChunks[i], - data: { ...articleMeta, role: "agent" } + data: { + articleNumber: articleMeta.articleNumber, + role: "agent", + url: `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}` + } }); } } @@ -272,7 +364,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ if (supervisorChunks.length > 0) { console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ - name: `[${articleMeta.articleNumber}] ${title} — Manager`, + name: sanitizeSourceName(`[${articleMeta.articleNumber}] ${title} - Manager`), tags: supervisorTags as string[], chunkCount: supervisorChunks.length }); @@ -282,11 +374,84 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ await api.createKnowledgeChunk({ knowledgeSourceId: supervisorSourceId, text: supervisorChunks[i], - data: { ...articleMeta, role: "supervisor" } + data: { + articleNumber: articleMeta.articleNumber, + role: "supervisor", + url: `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}` + } }); } } } } + + // --- Stale article removal --- + // Requires Management API credentials; only meaningful when running a full sync. + const removalEnabled = + (cognigyApiUrl as string)?.trim() && + (cognigyApiKey as string)?.trim() && + (knowledgeStoreId as string)?.trim(); + + if (removalEnabled) { + console.log("[Salesforce KC] Checking for stale sources to remove…"); + try { + const existingSources = await listKnowledgeSources( + (cognigyApiUrl as string).trim(), + (cognigyApiKey as string).trim(), + (knowledgeStoreId as string).trim(), + ); + + // Filter to sources that were created by this KC (have [ArticleNumber] prefix) + const sfSources = existingSources.filter(s => /^\[([^\]]+)\]/.test(s.name)); + if (sfSources.length === 0) { + console.log("[Salesforce KC] No Salesforce-pattern sources found in store — skipping stale check"); + } else { + // Extract unique article numbers from source names + const articleNumbers = [ + ...new Set( + sfSources + .map(s => { + const m = s.name.match(/^\[([^\]]+)\]/); + return m ? m[1] : null; + }) + .filter(Boolean) as string[] + ) + ]; + + console.log(`[Salesforce KC] Verifying ${articleNumbers.length} unique article number(s) in Salesforce…`); + + // Batch query Salesforce to find still-active articles + const inClause = articleNumbers.map(n => `'${n}'`).join(", "); + const checkSoql = `SELECT ArticleNumber FROM ${knowledgeApiName} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; + const checkResult = await salesforceConnection.query(checkSoql, { autoFetch: true }); + const activeNumbers = new Set( + checkResult.records.map((r: any) => String(r.ArticleNumber)) + ); + + // Delete sources whose article numbers are no longer active + for (const src of sfSources) { + const m = src.name.match(/^\[([^\]]+)\]/); + const articleNumber = m ? m[1] : null; + if (articleNumber && !activeNumbers.has(articleNumber)) { + console.log(`[Salesforce KC] Removing stale source "${src.name}" (article ${articleNumber} no longer active)`); + try { + await deleteKnowledgeSourceById( + (cognigyApiUrl as string).trim(), + (cognigyApiKey as string).trim(), + (knowledgeStoreId as string).trim(), + src._id, + ); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.warn(`[Salesforce KC] Could not remove stale source ${src._id}: ${msg}`); + } + } + } + } + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.warn(`[Salesforce KC] Stale removal skipped due to error: ${msg}`); + } + } } }); From 5770830a8b2a1508d728b488d8da840887544898 Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Thu, 5 Mar 2026 07:33:23 +1100 Subject: [PATCH 4/8] Improve salesforce knowledge chunking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit htmlFieldToMarkdown() (replaces stripHtml) Converts HTML to proper Markdown instead of stripping it flat. Headings inside a field are shifted down (e.g.

inside an Overview field → ####) so they sit correctly below the ## Overview field label. --- extensions/salesforce/import-all.ts | 493 ------------------ extensions/salesforce/import-one.ts | 210 -------- .../salesforceKnowledgeConnector.ts | 297 ++++++++--- 3 files changed, 217 insertions(+), 783 deletions(-) delete mode 100644 extensions/salesforce/import-all.ts delete mode 100644 extensions/salesforce/import-one.ts diff --git a/extensions/salesforce/import-all.ts b/extensions/salesforce/import-all.ts deleted file mode 100644 index 0488ae275..000000000 --- a/extensions/salesforce/import-all.ts +++ /dev/null @@ -1,493 +0,0 @@ -/** - * Full import of all published Salesforce Knowledge articles into Cognigy Knowledge AI. - * Same logic as the connector — fetches from Salesforce, processes text, creates - * sources + chunks via the Cognigy REST API. - * Run: npx ts-node import-all.ts - * Resume: npx ts-node import-all.ts --from=235 - */ - -import * as dotenv from "dotenv"; -dotenv.config(); - -import axios, { AxiosInstance } from "axios"; -import * as fs from "fs"; -import * as path from "path"; -import { authenticate } from "./src/authenticate"; - -// ─── State file ─────────────────────────────────────────────────────────────── - -const STATE_FILE = path.join(__dirname, "import-state.json"); - -interface ArticleState { - agentSourceId?: string; - supervisorSourceId?: string; - lastPublishedDate: string; -} - -type ImportState = Record; // keyed by articleNumber - -function loadState(): ImportState { - try { - if (fs.existsSync(STATE_FILE)) { - return JSON.parse(fs.readFileSync(STATE_FILE, "utf8")); - } - } catch { - console.warn("Warning: Could not read import-state.json, starting fresh."); - } - return {}; -} - -function saveState(state: ImportState): void { - fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2), "utf8"); -} - -// ─── Text helpers ───────────────────────────────────────────────────────────── - -function stripHtml(html: string): string { - if (!html) return ""; - return html - .replace(//gi, "\n") - .replace(/<\/p>/gi, "\n") - .replace(/<\/h[1-6]>/gi, "\n") - .replace(/<\/li>/gi, "\n") - .replace(/<\/tr>/gi, "\n") - .replace(/<\/td>/gi, " | ") - .replace(/<\/th>/gi, " | ") - .replace(/]*>/gi, "- ") - .replace(/<[^>]+>/g, "") - .replace(/&/g, "&") - .replace(/</g, "<") - .replace(/>/g, ">") - .replace(/ /g, " ") - .replace(/"/g, '"') - .replace(/'/g, "'") - .replace(/&[a-z]+;/gi, " ") - .replace(/[ \t]+/g, " ") - .replace(/\n{3,}/g, "\n\n") - .trim(); -} - -function sanitizeText(text: string): string { - if (!text) return ""; - return text - .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") - .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") - .replace(/[ \t]{2,}/g, " ") - .trim(); -} - -function chunkContent(text: string, maxSize: number = 2000): string[] { - if (!text || text.length === 0) return []; - if (text.length <= maxSize) return [text]; - const chunks: string[] = []; - let remaining = text; - while (remaining.length > 0) { - if (remaining.length <= maxSize) { - chunks.push(remaining.trim()); - break; - } - let splitAt = remaining.lastIndexOf("\n\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); - if (splitAt <= 0) splitAt = maxSize; - const chunk = remaining.substring(0, splitAt).trim(); - if (chunk.length > 0) chunks.push(chunk); - remaining = remaining.substring(splitAt).trim(); - } - return chunks.filter((c) => c.length > 0); -} - -function sanitizeSourceName(name: string): string { - return name - .replace(/\u2013|\u2014/g, "-") - .replace(/&/g, "and") - .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") - .replace(/[ \t]{2,}/g, " ") - .replace(/-{2,}/g, "-") - .trim(); -} - -function buildArticleText( - title: string, - summary: string, - fields: string[], - article: any, -): string { - const sections: string[] = []; - if (title) sections.push(`# ${sanitizeText(title)}`); - if (summary) sections.push(sanitizeText(stripHtml(summary))); - for (const field of fields) { - const raw = article[field]; - if (!raw) continue; - const content = sanitizeText(stripHtml(String(raw))); - if (!content) continue; - const label = field.replace(/__c$/i, "").replace(/_/g, " "); - sections.push(`${label}:\n${content}`); - } - return sections.join("\n\n"); -} - -// ─── Cognigy REST API helpers ───────────────────────────────────────────────── - -const STORE_ID = process.env.COGNIGY_KNOWLEDGE_STORE_ID!; - -function makeCognigyApi(): AxiosInstance { - return axios.create({ - baseURL: process.env.COGNIGY_API_BASE, - headers: { "X-API-Key": process.env.COGNIGY_API_KEY }, - }); -} - -const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); - -/** Detect AWS WAF / CAPTCHA HTML response instead of JSON */ -function isWafResponse(err: any): boolean { - const body = err.response?.data; - if (typeof body === "string" && body.includes("( - fn: () => Promise, - label: string, - maxRetries = 5, -): Promise { - let delay = 2000; - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - return await fn(); - } catch (err: any) { - const status = err.response?.status; - - if (isWafResponse(err)) { - console.warn( - ` [WAF detected] ${label} — AWS WAF challenge returned. Waiting 60s before retry...`, - ); - await sleep(60000); - // Reset delay after WAF cooldown - delay = 2000; - continue; - } - - const retryable = - status === 405 || - status === 429 || - status === 503 || - status === 502 || - !status; - if (retryable && attempt < maxRetries) { - console.warn( - ` [retry ${attempt}/${maxRetries - 1}] ${label} — HTTP ${status ?? err.message}, waiting ${delay / 1000}s...`, - ); - await sleep(delay); - delay = Math.min(delay * 2, 30000); - } else { - throw err; - } - } - } - throw new Error(`${label} failed after ${maxRetries} attempts`); -} - -/** Returns true if the error message indicates a duplicate name conflict */ -function isDuplicateNameError(err: any): boolean { - const detail: string = err.response?.data?.detail || err.message || ""; - return detail.toLowerCase().includes("already exists"); -} - -async function createSource(api: AxiosInstance, name: string): Promise { - const resp = await withRetry( - () => - api.post(`/v2.0/knowledgestores/${STORE_ID}/sources`, { - name, - type: "manual", - }), - `createSource(${name})`, - ); - return resp.data.knowledgeSource._id as string; -} - -async function createChunk( - api: AxiosInstance, - sourceId: string, - text: string, - order: number, - data: Record, -): Promise { - await withRetry( - () => - api.post(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}/chunks`, { - text, - order, - data, - }), - `createChunk(order=${order})`, - ); -} - -async function deleteSource( - api: AxiosInstance, - sourceId: string, -): Promise { - try { - await api.delete(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`); - } catch { - // best-effort cleanup - } -} - -// ─── Field config ───────────────────────────────────────────────────────────── - -const agentFields = [ - "Overview__c", - "Details__c", - "Inclusions__c", - "Exclusions__c", - "Information__c", - "Processing_Steps_Text__c", - "Questions__c", - "Answer__c", - "Scripting__c", - "Actions__c", -]; -const supervisorFields = [ - "Manager_Actions__c", - "Manager_information__c", - "Manager_processing_steps__c", - "Manager_scripting__c", -]; - -// ─── Per-article import ─────────────────────────────────────────────────────── - -type ImportResult = "imported" | "updated" | "skipped" | "failed" | "empty"; - -async function importArticle( - cognigy: AxiosInstance, - article: any, - instanceUrl: string, - index: number, - total: number, - state: ImportState, -): Promise { - const title = article.Title || `Article ${article.ArticleNumber}`; - const summary = article.Summary || ""; - const articleNum = String(article.ArticleNumber || ""); - const knowledgeArticleId = String(article.KnowledgeArticleId || ""); - const articleUrl = `${instanceUrl}/lightning/articles/${knowledgeArticleId}`; - const lastPublishedDate = String(article.LastPublishedDate || ""); - const prefix = `[${index}/${total}] [${articleNum}]`; - - const agentText = buildArticleText(title, summary, agentFields, article); - const agentChunks = chunkContent(agentText); - - const hasSupervisor = supervisorFields.some((f) => { - const raw = article[f]; - return raw && sanitizeText(stripHtml(String(raw))).length > 0; - }); - const supervisorChunks = hasSupervisor - ? chunkContent(buildArticleText(title, summary, supervisorFields, article)) - : []; - - if (agentChunks.length === 0 && supervisorChunks.length === 0) { - console.log(`${prefix} SKIP — no content`); - return "empty"; - } - - // ── Check if already imported and up-to-date ────────────────────────────── - const existing = state[articleNum]; - if (existing) { - if (existing.lastPublishedDate === lastPublishedDate) { - console.log(`${prefix} SKIP — up to date (${lastPublishedDate})`); - return "skipped"; - } - // Content may have changed — delete old sources and reimport - console.log( - `${prefix} UPDATE — date changed (${existing.lastPublishedDate} → ${lastPublishedDate})`, - ); - if (existing.agentSourceId) { - await deleteSource(cognigy, existing.agentSourceId); - } - if (existing.supervisorSourceId) { - await deleteSource(cognigy, existing.supervisorSourceId); - } - delete state[articleNum]; - saveState(state); - } - - let agentSourceId: string | undefined; - let supervisorSourceId: string | undefined; - let anyFailed = false; - - // ── Agent source ────────────────────────────────────────────────────────── - if (agentChunks.length > 0) { - const sourceName = sanitizeSourceName(`[${articleNum}] ${title}`); - try { - agentSourceId = await createSource(cognigy, sourceName); - for (let i = 0; i < agentChunks.length; i++) { - await createChunk(cognigy, agentSourceId, agentChunks[i], i, { - articleNumber: articleNum, - role: "agent", - url: articleUrl, - }); - } - console.log(`${prefix} Agent OK — ${agentChunks.length} chunk(s)`); - } catch (err: any) { - const msg = err.response?.data?.detail || err.message; - - // If duplicate error and we don't have a state entry, the source - // exists from a previous run that we have no record of — treat as - // up-to-date (can't retrieve the source ID without the list API). - if (isDuplicateNameError(err)) { - console.warn( - `${prefix} Agent SKIP — source already exists (no state record; treating as current)`, - ); - } else { - console.error(`${prefix} Agent FAILED — ${msg}`); - anyFailed = true; - } - if (agentSourceId) await deleteSource(cognigy, agentSourceId); - agentSourceId = undefined; - } - } - - // ── Supervisor source ───────────────────────────────────────────────────── - if (supervisorChunks.length > 0) { - const sourceName = sanitizeSourceName(`[${articleNum}] ${title} - Manager`); - try { - supervisorSourceId = await createSource(cognigy, sourceName); - for (let i = 0; i < supervisorChunks.length; i++) { - await createChunk(cognigy, supervisorSourceId, supervisorChunks[i], i, { - articleNumber: articleNum, - role: "supervisor", - url: articleUrl, - }); - } - console.log( - `${prefix} Supervisor OK — ${supervisorChunks.length} chunk(s)`, - ); - } catch (err: any) { - const msg = err.response?.data?.detail || err.message; - if (isDuplicateNameError(err)) { - console.warn( - `${prefix} Supervisor SKIP — source already exists (no state record; treating as current)`, - ); - } else { - console.error(`${prefix} Supervisor FAILED — ${msg}`); - anyFailed = true; - } - if (supervisorSourceId) await deleteSource(cognigy, supervisorSourceId); - supervisorSourceId = undefined; - } - } - - // ── Persist state ───────────────────────────────────────────────────────── - // Record even partial success so we can skip on re-run - const hadPreviousEntry = !!existing; - state[articleNum] = { - ...(agentSourceId ? { agentSourceId } : {}), - ...(supervisorSourceId ? { supervisorSourceId } : {}), - lastPublishedDate, - }; - saveState(state); - - if (anyFailed) return "failed"; - return hadPreviousEntry ? "updated" : "imported"; -} - -// ─── Main ───────────────────────────────────────────────────────────────────── - -async function main() { - console.log("Authenticating with Salesforce..."); - const sf = await authenticate({ - consumerKey: process.env.SF_CONSUMER_KEY!, - consumerSecret: process.env.SF_CONSUMER_SECRET!, - instanceUrl: process.env.SF_INSTANCE_URL!, - }); - console.log(`Authenticated. Instance: ${sf.instanceUrl}\n`); - - const allFields = [...new Set([...agentFields, ...supervisorFields])]; - const soql = [ - `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, - `Language, LastPublishedDate, ${allFields.join(", ")}`, - `FROM Knowledge_Article__kav`, - `WHERE PublishStatus = 'Online'`, - `AND Language = 'en_US'`, - `AND IsLatestVersion = true`, - `ORDER BY Title ASC`, - ].join(" "); - - console.log("Fetching all published en_US articles..."); - const result = await sf.query(soql, { autoFetch: true }); - const articles = result.records; - console.log(`Found ${articles.length} articles.\n`); - - // Support --from N to resume from a specific article index (1-based) - const fromArg = process.argv.find((a) => a.startsWith("--from=")); - const startFrom = fromArg - ? Math.max(0, parseInt(fromArg.split("=")[1], 10) - 1) - : 0; - if (startFrom > 0) - console.log(`Resuming from article ${startFrom + 1}/${articles.length}\n`); - - const state = loadState(); - const stateCount = Object.keys(state).length; - if (stateCount > 0) { - console.log( - `State file loaded: ${stateCount} article(s) already tracked.\n`, - ); - } - - console.log("=".repeat(70)); - - const cognigy = makeCognigyApi(); - const startTime = Date.now(); - - let imported = 0; - let updated = 0; - let skipped = 0; - let empty = 0; - let failed = 0; - - for (let i = startFrom; i < articles.length; i++) { - try { - const outcome = await importArticle( - cognigy, - articles[i], - sf.instanceUrl, - i + 1, - articles.length, - state, - ); - if (outcome === "imported") imported++; - else if (outcome === "updated") updated++; - else if (outcome === "skipped") skipped++; - else if (outcome === "empty") empty++; - else if (outcome === "failed") failed++; - } catch (err: any) { - failed++; - console.error( - `[${i + 1}/${articles.length}] Unexpected error: ${err.message}`, - ); - } - // Pause between every article to avoid WAF rate limiting - await sleep(1500); - } - - const elapsed = Math.round((Date.now() - startTime) / 1000); - - console.log("=".repeat(70)); - console.log(`\nDone in ${elapsed}s`); - console.log(` Imported (new): ${imported}`); - console.log(` Updated (changed): ${updated}`); - console.log(` Skipped (up to date): ${skipped}`); - console.log(` Skipped (no content): ${empty}`); - console.log(` Errors: ${failed}`); -} - -main().catch((err) => { - console.error("Fatal:", err.response?.data || err.message); - process.exit(1); -}); diff --git a/extensions/salesforce/import-one.ts b/extensions/salesforce/import-one.ts deleted file mode 100644 index 84a1ef383..000000000 --- a/extensions/salesforce/import-one.ts +++ /dev/null @@ -1,210 +0,0 @@ -/** - * Import one Salesforce Knowledge article directly into Cognigy Knowledge AI - * via REST API (bypasses the extension runtime). - * - * Run: npx ts-node import-one.ts - */ - -import * as dotenv from "dotenv"; -dotenv.config(); - -import axios from "axios"; -import { authenticate } from "./src/authenticate"; - -// ─── Same helpers as salesforceKnowledgeConnector.ts ───────────────────────── - -function stripHtml(html: string): string { - if (!html) return ""; - return html - .replace(//gi, "\n") - .replace(/<\/p>/gi, "\n") - .replace(/<\/h[1-6]>/gi, "\n") - .replace(/<\/li>/gi, "\n") - .replace(/<\/tr>/gi, "\n") - .replace(/<\/td>/gi, " | ") - .replace(/<\/th>/gi, " | ") - .replace(/]*>/gi, "- ") - .replace(/<[^>]+>/g, "") - .replace(/&/g, "&") - .replace(/</g, "<") - .replace(/>/g, ">") - .replace(/ /g, " ") - .replace(/"/g, '"') - .replace(/'/g, "'") - .replace(/&[a-z]+;/gi, " ") - .replace(/[ \t]+/g, " ") - .replace(/\n{3,}/g, "\n\n") - .trim(); -} - -function sanitizeText(text: string): string { - if (!text) return ""; - return text - .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") - .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") - .replace(/[ \t]{2,}/g, " ") - .trim(); -} - -function chunkContent(text: string, maxSize: number = 2000): string[] { - if (!text || text.length === 0) return []; - if (text.length <= maxSize) return [text]; - - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxSize) { - chunks.push(remaining.trim()); - break; - } - - let splitAt = remaining.lastIndexOf("\n\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); - if (splitAt <= 0) splitAt = maxSize; - - const chunk = remaining.substring(0, splitAt).trim(); - if (chunk.length > 0) chunks.push(chunk); - remaining = remaining.substring(splitAt).trim(); - } - - return chunks.filter(c => c.length > 0); -} - -function buildArticleText(title: string, summary: string, fields: string[], article: any): string { - const sections: string[] = []; - if (title) sections.push(`# ${sanitizeText(title)}`); - if (summary) sections.push(sanitizeText(stripHtml(summary))); - for (const field of fields) { - const raw = article[field]; - if (!raw) continue; - const content = sanitizeText(stripHtml(String(raw))); - if (!content) continue; - const label = field.replace(/__c$/i, "").replace(/_/g, " "); - sections.push(`${label}:\n${content}`); - } - return sections.join("\n\n"); -} - -// ─── Cognigy Knowledge REST API ─────────────────────────────────────────────── - -const cognigyApi = axios.create({ - baseURL: process.env.COGNIGY_API_BASE, - headers: { "X-API-Key": process.env.COGNIGY_API_KEY } -}); - -const STORE_ID = process.env.COGNIGY_KNOWLEDGE_STORE_ID!; - -async function createKnowledgeSource(name: string, tags: string[]): Promise { - const resp = await cognigyApi.post(`/v2.0/knowledgestores/${STORE_ID}/sources`, { - name, - type: "manual" - }); - const sourceId: string = resp.data.knowledgeSource._id; - - // Add tags via PATCH if any - if (tags.length > 0) { - try { - await cognigyApi.patch(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`, { - metaData: { tags } - }); - } catch { - console.warn(` Note: Could not set tags on source (non-critical)`); - } - } - return sourceId; -} - -async function createKnowledgeChunk(sourceId: string, text: string, order: number, metaData?: Record): Promise { - await cognigyApi.post(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}/chunks`, { - text, - order, - ...(metaData ? { data: metaData } : {}) - }); -} - -async function deleteKnowledgeSource(sourceId: string): Promise { - try { - await cognigyApi.delete(`/v2.0/knowledgestores/${STORE_ID}/sources/${sourceId}`); - } catch (e: any) { - console.warn(` Warning: Could not delete source ${sourceId}: ${e.message}`); - } -} - -// ─── Main ───────────────────────────────────────────────────────────────────── - -async function main() { - const agentFields = [ - "Overview__c", "Details__c", "Inclusions__c", "Exclusions__c", - "Information__c", "Processing_Steps_Text__c", "Questions__c", - "Answer__c", "Scripting__c", "Actions__c" - ]; - - console.log("Authenticating with Salesforce..."); - const sf = await authenticate({ - consumerKey: process.env.SF_CONSUMER_KEY!, - consumerSecret: process.env.SF_CONSUMER_SECRET!, - instanceUrl: process.env.SF_INSTANCE_URL! - }); - console.log("Authenticated.\n"); - - // Fetch just ONE article (alphabetically first) - const soql = [ - `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, - `Language, LastPublishedDate, ${agentFields.join(", ")}`, - `FROM Knowledge_Article__kav`, - `WHERE PublishStatus = 'Online'`, - `AND Language = 'en_US'`, - `AND IsLatestVersion = true`, - `ORDER BY Title ASC`, - `LIMIT 1` - ].join(" "); - - console.log("Fetching one article from Salesforce..."); - const result = await sf.query(soql); - const article = result.records[0]; - if (!article) throw new Error("No article found"); - - const title = article.Title || `Article ${article.ArticleNumber}`; - const summary = article.Summary || ""; - const articleNum = String(article.ArticleNumber || ""); - const knowledgeArticleId = String(article.KnowledgeArticleId || ""); - const articleUrl = `${process.env.SF_INSTANCE_URL}/lightning/articles/${knowledgeArticleId}`; - - console.log(`Article: [${articleNum}] ${title}`); - - const agentText = buildArticleText(title, summary, agentFields, article); - const agentChunks = chunkContent(agentText); - - console.log(`Chunks: ${agentChunks.length}`); - agentChunks.forEach((c, i) => console.log(` Chunk ${i + 1}: ${c.length} chars`)); - - const sourceName = `[${articleNum}] ${title}`; - console.log(`\nCreating knowledge source: "${sourceName}"...`); - const sourceId = await createKnowledgeSource(sourceName, ["agent"]); - console.log(` Source ID: ${sourceId}`); - - console.log("Creating chunks..."); - for (let i = 0; i < agentChunks.length; i++) { - process.stdout.write(` Chunk ${i + 1}/${agentChunks.length}... `); - try { - await createKnowledgeChunk(sourceId, agentChunks[i], i, { articleNumber: articleNum, role: "agent", url: articleUrl }); - console.log("OK"); - } catch (err: any) { - console.log(`FAILED: ${err.response?.data?.detail || err.message}`); - console.log(" Cleaning up source..."); - await deleteKnowledgeSource(sourceId); - throw new Error(`Chunk ${i + 1} failed: ${err.response?.data?.detail || err.message}`); - } - } - - console.log(`\nDone! Article "[${articleNum}] ${title}" imported successfully.`); - console.log(`Check: https://jetstar-dev.cognigy.cloud/project/68e5bf2c1271c80086891c47/68e5bf2cd3f7785588c6af0b/knowledge/69a2c04eba85740ed7854db3`); -} - -main().catch(err => { - console.error("\nFatal error:", err.response?.data || err.message || err); - process.exit(1); -}); diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index df596c05a..d3840db2f 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -11,42 +11,12 @@ interface IOAuthConnection { instanceUrl: string; } -/** - * Strip HTML tags and decode entities into plain readable text. - * Preserves structure for tables (pipe-separated) and lists (bullet points). - */ -function stripHtml(html: string): string { - if (!html) return ""; - return html - .replace(//gi, "\n") - .replace(/<\/p>/gi, "\n") - .replace(/<\/h[1-6]>/gi, "\n") - .replace(/<\/li>/gi, "\n") - .replace(/<\/tr>/gi, "\n") - .replace(/<\/td>/gi, " | ") - .replace(/<\/th>/gi, " | ") - .replace(/]*>/gi, "- ") - .replace(/<[^>]+>/g, "") - .replace(/&/g, "&") - .replace(/</g, "<") - .replace(/>/g, ">") - .replace(/ /g, " ") - .replace(/"/g, '"') - .replace(/'/g, "'") - .replace(/&[a-z]+;/gi, " ") - .replace(/[ \t]+/g, " ") - .replace(/\n{3,}/g, "\n\n") - .trim(); -} - /** * Remove null bytes and non-printable control characters that can cause * embedding/store failures. Preserves standard whitespace (newline, tab). */ function sanitizeText(text: string): string { if (!text) return ""; - // Remove control characters, then strip any remaining non-ASCII (>127) to avoid - // vector store rejection of unusual Unicode characters from HTML stripping. return text .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") @@ -55,73 +25,242 @@ function sanitizeText(text: string): string { } /** - * Split long text into chunks at natural boundaries (paragraphs → sentences → words). + * Convert HTML from a Salesforce field to Markdown. + * Heading levels are shifted down by fieldDepth so the field-label heading + * (## FieldName) sits above any headings that were inside the HTML content. + * + * e.g. fieldDepth=2:

→ ###,

→ ####,

→ ##### */ -function chunkContent(text: string, maxSize: number = 2000): string[] { - if (!text || text.length === 0) return []; - if (text.length <= maxSize) return [text]; - - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > 0) { - if (remaining.length <= maxSize) { - chunks.push(remaining.trim()); - break; - } - - let splitAt = remaining.lastIndexOf("\n\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf("\n", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(". ", maxSize); - if (splitAt < maxSize * 0.5) splitAt = remaining.lastIndexOf(" ", maxSize); - if (splitAt <= 0) splitAt = maxSize; +function htmlFieldToMarkdown(html: string, fieldDepth: number = 2): string { + if (!html) return ""; - const chunk = remaining.substring(0, splitAt).trim(); - if (chunk.length > 0) chunks.push(chunk); - remaining = remaining.substring(splitAt).trim(); - } + const shiftedH = (level: number) => "#".repeat(Math.min(level + fieldDepth, 6)); + const stripInner = (s: string) => s.replace(/<[^>]+>/g, "").trim(); + + const md = html + // Headings — shift down by fieldDepth + .replace(/]*>([\s\S]*?)<\/h1>/gi, (_, t) => `${shiftedH(1)} ${stripInner(t)}\n\n`) + .replace(/]*>([\s\S]*?)<\/h2>/gi, (_, t) => `${shiftedH(2)} ${stripInner(t)}\n\n`) + .replace(/]*>([\s\S]*?)<\/h3>/gi, (_, t) => `${shiftedH(3)} ${stripInner(t)}\n\n`) + .replace(/]*>([\s\S]*?)<\/h4>/gi, (_, t) => `${shiftedH(4)} ${stripInner(t)}\n\n`) + .replace(/]*>([\s\S]*?)<\/h5>/gi, (_, t) => `${shiftedH(5)} ${stripInner(t)}\n\n`) + .replace(/]*>([\s\S]*?)<\/h6>/gi, (_, t) => `${shiftedH(6)} ${stripInner(t)}\n\n`) + // Bold / italic + .replace(/<(strong|b)[^>]*>([\s\S]*?)<\/(strong|b)>/gi, "**$2**") + .replace(/<(em|i)[^>]*>([\s\S]*?)<\/(em|i)>/gi, "_$2_") + // Lists + .replace(/]*>/gi, "").replace(/<\/ul>/gi, "\n") + .replace(/]*>/gi, "").replace(/<\/ol>/gi, "\n") + .replace(/]*>([\s\S]*?)<\/li>/gi, (_, t) => `- ${stripInner(t)}\n`) + // Tables — simple pipe format + .replace(/]*>/gi, "\n").replace(/<\/table>/gi, "\n") + .replace(/]*>/gi, "").replace(/<\/t(?:head|body|foot)>/gi, "") + .replace(/]*>/gi, "").replace(/<\/tr>/gi, " |\n") + .replace(/]*>([\s\S]*?)<\/th>/gi, (_, t) => `| **${stripInner(t)}** `) + .replace(/]*>([\s\S]*?)<\/td>/gi, (_, t) => `| ${stripInner(t)} `) + // Block elements + .replace(/<\/p>/gi, "\n\n").replace(/]*>/gi, "") + .replace(/<\/div>/gi, "\n").replace(/]*>/gi, "") + .replace(//gi, "\n") + // Links — keep visible text only + .replace(/]*>([\s\S]*?)<\/a>/gi, "$1") + // Strip remaining tags + .replace(/<[^>]+>/g, "") + // HTML entities + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">") + .replace(/ /g, " ").replace(/"/g, '"').replace(/'/g, "'") + .replace(/&[a-z]+;/gi, " ") + // Normalise whitespace + .replace(/[ \t]+/g, " ") + .replace(/\n{3,}/g, "\n\n") + .trim(); - return chunks.filter(c => c.length > 0); + return sanitizeText(md); } /** * Sanitize a string for use as a Cognigy knowledge source name. - * Cognigy enforces a resource-name format: replaces/removes characters - * outside of letters, numbers, spaces, hyphens, and square brackets. */ function sanitizeSourceName(name: string): string { return name - .replace(/\u2013|\u2014/g, "-") // en dash, em dash → hyphen - .replace(/&/g, "and") // & → and - .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") // special chars → space - .replace(/[ \t]{2,}/g, " ") // collapse multiple spaces - .replace(/-{2,}/g, "-") // collapse multiple hyphens + .replace(/\u2013|\u2014/g, "-") + .replace(/&/g, "and") + .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") + .replace(/[ \t]{2,}/g, " ") + .replace(/-{2,}/g, "-") .trim(); } /** - * Build readable plain text from a set of article fields. - * Each non-empty field is prefixed with a human-readable label. + * Build a Markdown document from a Salesforce article's fields. + * + * Structure: + * # Article Title + * + * ## Field Label + * + * ## Next Field Label + * ... */ function buildArticleText(title: string, summary: string, fields: string[], article: any): string { const sections: string[] = []; if (title) sections.push(`# ${sanitizeText(title)}`); - if (summary) sections.push(sanitizeText(stripHtml(summary))); + + if (summary) { + const summaryMd = htmlFieldToMarkdown(summary, 1); + if (summaryMd) sections.push(summaryMd); + } for (const field of fields) { const raw = article[field]; if (!raw) continue; - const content = sanitizeText(stripHtml(String(raw))); + const content = htmlFieldToMarkdown(String(raw), 2); if (!content) continue; - // Convert field API name to a readable label: Manager_Actions__c → Manager Actions + // Convert API name to a readable label: Manager_Actions__c → Manager Actions const label = field.replace(/__c$/i, "").replace(/_/g, " "); - sections.push(`${label}:\n${content}`); + sections.push(`## ${label}\n\n${content}`); } return sections.join("\n\n"); } +// --------------------------------------------------------------------------- +// Heading-aware chunker +// --------------------------------------------------------------------------- + +const HARD_MAX_CHARS = 1950; +const MIN_BODY_CHARS = 30; + +interface ArticleChunk { + /** Full chunk text sent to Cognigy (includes title prefix + section heading). */ + text: string; + /** Section heading for the chunk metadata field. */ + section: string; +} + +/** + * Split article Markdown into RAG-optimised chunks. + * + * Splits at H2 (field-level) boundaries only; every chunk is prefixed with + * the article's H1 title so the LLM always has article context. + * Long H2 sections are further split at paragraph → sentence boundaries. + */ +function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): ArticleChunk[] { + const effective = Math.min(maxChars, HARD_MAX_CHARS); + + // Extract the H1 title to use as a prefix in every chunk + const titleMatch = markdown.match(/^#\s+(.+)$/m); + const titlePrefix = titleMatch ? `# ${titleMatch[1].trim()}\n\n` : ""; + + // --- Parse markdown into H1/H2 sections only --------------------------- + // H3+ headings (internal field structure) are kept as content, not split points. + const lines = markdown.split("\n"); + const sections: Array<{ level: number; heading: string; content: string }> = []; + let curLevel = 0; + let curHeading = ""; + let curLines: string[] = []; + let started = false; + + for (const line of lines) { + const m = line.match(/^(#{1,2})\s+(.+)$/); + if (m) { + if (started) { + sections.push({ level: curLevel, heading: curHeading, content: curLines.join("\n").trim() }); + } + curLevel = m[1].length; + curHeading = m[2].trim(); + curLines = []; + started = true; + } else if (started) { + curLines.push(line); + } + // Lines before the first heading are ignored (empty for well-formed docs) + } + if (started) { + sections.push({ level: curLevel, heading: curHeading, content: curLines.join("\n").trim() }); + } + + const chunks: ArticleChunk[] = []; + + for (const section of sections) { + // H1 section: title is already in titlePrefix; use content (summary) directly + if (section.level === 1) { + if (!section.content.trim()) continue; + const fullText = titlePrefix + section.content; + if (fullText.length <= effective) { + chunks.push({ text: fullText, section: "" }); + } else { + const subTexts = splitAtBoundaries(section.content, effective - titlePrefix.length); + for (const sub of subTexts) { + chunks.push({ text: titlePrefix + sub, section: "" }); + } + } + continue; + } + + // H2 section: field-level chunk + const headingLine = `## ${section.heading}\n\n`; + const fullText = titlePrefix + headingLine + section.content; + + if (fullText.length <= effective) { + chunks.push({ text: fullText, section: section.heading }); + } else { + const subMax = effective - titlePrefix.length - headingLine.length; + const subTexts = splitAtBoundaries(section.content, Math.max(subMax, 200)); + for (let i = 0; i < subTexts.length; i++) { + const cont = i > 0 ? " (continued)" : ""; + chunks.push({ + text: titlePrefix + `## ${section.heading}${cont}\n\n` + subTexts[i], + section: section.heading, + }); + } + } + } + + // If parsing produced nothing (article has no headings), fall back to plain split + if (chunks.length === 0 && markdown.trim()) { + return splitAtBoundaries(markdown, effective).map(t => ({ text: t, section: "" })); + } + + // Filter out chunks with no meaningful prose content + return chunks.filter(c => { + const body = c.text + .replace(/^#{1,6}[^\n]*\n/gm, "") + .replace(/\s/g, ""); + return body.length >= MIN_BODY_CHARS; + }); +} + +/** + * Split text at natural boundaries (paragraph → newline → sentence → word). + */ +function splitAtBoundaries(text: string, maxChars: number): string[] { + if (text.length <= maxChars) return [text]; + + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxChars) { + chunks.push(remaining.trim()); + break; + } + + let splitAt = remaining.lastIndexOf("\n\n", maxChars); + if (splitAt < maxChars * 0.4) splitAt = remaining.lastIndexOf("\n", maxChars); + if (splitAt < maxChars * 0.4) splitAt = remaining.lastIndexOf(". ", maxChars); + if (splitAt < maxChars * 0.4) splitAt = remaining.lastIndexOf(" ", maxChars); + if (splitAt <= 0) splitAt = maxChars; + + const chunk = remaining.substring(0, splitAt).trim(); + if (chunk.length > 0) chunks.push(chunk); + remaining = remaining.substring(splitAt).trim(); + } + + return chunks.filter(c => c.length > 0); +} + export const salesforceKnowledgeConnector = createKnowledgeConnector({ type: "salesforceKnowledgeConnector", label: "Salesforce Knowledge", @@ -324,9 +463,11 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ lastPublishedDate: String(article.LastPublishedDate || "") }; + const articleUrl = `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}`; + // --- Agent Knowledge Source --- const agentText = buildArticleText(title, summary, agentFields as string[], article); - const agentChunks = chunkContent(agentText); + const agentChunks = chunkArticleMarkdown(agentText); if (agentChunks.length > 0) { console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); @@ -340,26 +481,26 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ console.log(`[Salesforce KC] Agent chunk ${i + 1}/${agentChunks.length} for "${title}"`); await api.createKnowledgeChunk({ knowledgeSourceId: agentSourceId, - text: agentChunks[i], + text: agentChunks[i].text, data: { articleNumber: articleMeta.articleNumber, role: "agent", - url: `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}` + section: agentChunks[i].section || null, + url: articleUrl } }); } } // --- Supervisor Knowledge Source --- - // Only created if at least one supervisor field has actual content const hasSupervisorContent = (supervisorFields as string[]).some(field => { const raw = article[field]; - return raw && sanitizeText(stripHtml(String(raw))).length > 0; + return raw && htmlFieldToMarkdown(String(raw)).length > 0; }); if (hasSupervisorContent) { const supervisorText = buildArticleText(title, summary, supervisorFields as string[], article); - const supervisorChunks = chunkContent(supervisorText); + const supervisorChunks = chunkArticleMarkdown(supervisorText); if (supervisorChunks.length > 0) { console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); @@ -373,11 +514,12 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ console.log(`[Salesforce KC] Supervisor chunk ${i + 1}/${supervisorChunks.length} for "${title}"`); await api.createKnowledgeChunk({ knowledgeSourceId: supervisorSourceId, - text: supervisorChunks[i], + text: supervisorChunks[i].text, data: { articleNumber: articleMeta.articleNumber, role: "supervisor", - url: `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}` + section: supervisorChunks[i].section || null, + url: articleUrl } }); } @@ -386,7 +528,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ } // --- Stale article removal --- - // Requires Management API credentials; only meaningful when running a full sync. const removalEnabled = (cognigyApiUrl as string)?.trim() && (cognigyApiKey as string)?.trim() && @@ -401,12 +542,10 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ (knowledgeStoreId as string).trim(), ); - // Filter to sources that were created by this KC (have [ArticleNumber] prefix) const sfSources = existingSources.filter(s => /^\[([^\]]+)\]/.test(s.name)); if (sfSources.length === 0) { console.log("[Salesforce KC] No Salesforce-pattern sources found in store — skipping stale check"); } else { - // Extract unique article numbers from source names const articleNumbers = [ ...new Set( sfSources @@ -420,7 +559,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ console.log(`[Salesforce KC] Verifying ${articleNumbers.length} unique article number(s) in Salesforce…`); - // Batch query Salesforce to find still-active articles const inClause = articleNumbers.map(n => `'${n}'`).join(", "); const checkSoql = `SELECT ArticleNumber FROM ${knowledgeApiName} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; const checkResult = await salesforceConnection.query(checkSoql, { autoFetch: true }); @@ -428,7 +566,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ checkResult.records.map((r: any) => String(r.ArticleNumber)) ); - // Delete sources whose article numbers are no longer active for (const src of sfSources) { const m = src.name.match(/^\[([^\]]+)\]/); const articleNumber = m ? m[1] : null; From 1534740d2f1a6891006ed70371ee5ba25a264a46 Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Thu, 5 Mar 2026 21:04:43 +1100 Subject: [PATCH 5/8] Address PR Review Comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - searchContact: escape single quotes in SOQL string literal to prevent injection - sanitizeText: preserve printable Unicode (accented chars, CJK, etc.) — only strip C0/C1 control characters, not all non-ASCII - salesforceKnowledgeConnector: guard agentFields/supervisorFields with Array.isArray so undefined supervisorFields doesn't throw at runtime - salesforceKnowledgeConnector: validate lastSyncDate as ISO 8601 before embedding in SOQL; throw a descriptive error on invalid input - salesforceKnowledgeConnector: prefix source names with [SF:] so stale-removal pattern is unambiguous in shared knowledge stores - cognigyManagementApi: strip trailing slashes from apiUrl before building request paths to prevent double-slash issues - package-lock.json: regenerate to sync version with package.json (4.5.1) --- extensions/salesforce/package-lock.json | 4 +- .../cognigyManagementApi.ts | 6 +- .../salesforceKnowledgeConnector.ts | 104 ++++++++++-------- .../salesforce/src/nodes/searchContact.ts | 5 +- 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/extensions/salesforce/package-lock.json b/extensions/salesforce/package-lock.json index b26efe03c..7980b0a77 100644 --- a/extensions/salesforce/package-lock.json +++ b/extensions/salesforce/package-lock.json @@ -1,12 +1,12 @@ { "name": "salesforce", - "version": "4.5.0", + "version": "4.5.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "salesforce", - "version": "4.5.0", + "version": "4.5.1", "license": "MIT", "dependencies": { "@cognigy/extension-tools": "^0.16.1", diff --git a/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts b/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts index 4f74aba54..5e1e27b4e 100644 --- a/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts +++ b/extensions/salesforce/src/knowledge-connectors/cognigyManagementApi.ts @@ -27,13 +27,14 @@ export async function listKnowledgeSources( apiKey: string, storeId: string, ): Promise { + const baseUrl = apiUrl.replace(/\/+$/, ""); const all: ManagedSource[] = []; const limit = 100; let skip = 0; while (true) { const res = await axios.get<{ data: ManagedSource[] }>( - `${apiUrl}/v2.0/knowledgestores/${storeId}/knowledgesources`, + `${baseUrl}/v2.0/knowledgestores/${storeId}/knowledgesources`, { params: { limit, skip }, headers: { "X-API-Key": apiKey }, @@ -63,8 +64,9 @@ export async function deleteKnowledgeSourceById( storeId: string, sourceId: string, ): Promise { + const baseUrl = apiUrl.replace(/\/+$/, ""); await axios.delete( - `${apiUrl}/v2.0/knowledgestores/${storeId}/knowledgesources/${sourceId}`, + `${baseUrl}/v2.0/knowledgestores/${storeId}/knowledgesources/${sourceId}`, { headers: { "X-API-Key": apiKey }, timeout: REQUEST_TIMEOUT, diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index d3840db2f..e56bb3a52 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -12,14 +12,15 @@ interface IOAuthConnection { } /** - * Remove null bytes and non-printable control characters that can cause - * embedding/store failures. Preserves standard whitespace (newline, tab). + * Remove null bytes, C0 and C1 control characters that can cause embedding/store + * failures. Preserves tab (\x09), LF (\x0A), CR (\x0D), and all printable + * Unicode so non-English article content (accented chars, CJK, etc.) is kept intact. */ function sanitizeText(text: string): string { if (!text) return ""; return text - .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") - .replace(/[^\x09\x0A\x0D\x20-\x7E]/g, "") + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "") // C0 control chars (keep tab/LF/CR) + .replace(/[\x7F-\x9F]/g, "") // DEL + C1 control chars .replace(/[ \t]{2,}/g, " ") .trim(); } @@ -96,29 +97,27 @@ function sanitizeSourceName(name: string): string { * * Structure: * # Article Title - * * ## Field Label * * ## Next Field Label * ... + * + * Note: the standard Salesforce Summary field is intentionally excluded here. + * Add "Summary" to agentFields if you want it included as a dedicated chunk. + * Including it automatically causes duplicate content when Summary == Overview__c. */ -function buildArticleText(title: string, summary: string, fields: string[], article: any): string { +function buildArticleText(title: string, fields: string[], article: any): string { const sections: string[] = []; if (title) sections.push(`# ${sanitizeText(title)}`); - if (summary) { - const summaryMd = htmlFieldToMarkdown(summary, 1); - if (summaryMd) sections.push(summaryMd); - } - for (const field of fields) { const raw = article[field]; if (!raw) continue; const content = htmlFieldToMarkdown(String(raw), 2); if (!content) continue; // Convert API name to a readable label: Manager_Actions__c → Manager Actions - const label = field.replace(/__c$/i, "").replace(/_/g, " "); + const label = field.replace(/__c$/i, "").replace(/_/g, " ").trim(); sections.push(`## ${label}\n\n${content}`); } @@ -201,12 +200,14 @@ function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): Articl // H2 section: field-level chunk const headingLine = `## ${section.heading}\n\n`; + const headingLineCont = `## ${section.heading} (continued)\n\n`; // worst-case length const fullText = titlePrefix + headingLine + section.content; if (fullText.length <= effective) { chunks.push({ text: fullText, section: section.heading }); } else { - const subMax = effective - titlePrefix.length - headingLine.length; + // Use the longer (continued) heading so sub-chunks never overflow + const subMax = effective - titlePrefix.length - headingLineCont.length; const subTexts = splitAtBoundaries(section.content, Math.max(subMax, 200)); for (let i = 0; i < subTexts.length; i++) { const cont = i > 0 ? " (continued)" : ""; @@ -420,19 +421,26 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ const salesforceConnection = await authenticate(oauthConnection as IOAuthConnection); + // Normalise to arrays so the connector doesn't throw if either field is + // omitted from the config (supervisorFields is marked required: false). + const agentFieldList = Array.isArray(agentFields) ? (agentFields as string[]) : []; + const supervisorFieldList = Array.isArray(supervisorFields) ? (supervisorFields as string[]) : []; + // Combine all content fields for a single SOQL query, deduplicated - const allContentFields = [...new Set([ - ...(agentFields as string[]), - ...(supervisorFields as string[]) - ])]; - - // Incremental date filter - const dateFilter = (syncMode as string) === "incremental" && (lastSyncDate as string)?.trim() - ? `AND LastModifiedDate > ${(lastSyncDate as string).trim()}` + const allContentFields = [...new Set([...agentFieldList, ...supervisorFieldList])]; + + // Incremental date filter — validate ISO 8601 format before embedding in SOQL + const rawDate = (lastSyncDate as string)?.trim() ?? ""; + const isValidIsoDate = /^\d{4}-\d{2}-\d{2}(T[\d:.Z+\-]+)?$/.test(rawDate); + if ((syncMode as string) === "incremental" && rawDate && !isValidIsoDate) { + throw new Error(`[Salesforce KC] Invalid Last Sync Date: "${rawDate}". Expected ISO 8601, e.g. 2026-01-01T00:00:00Z`); + } + const dateFilter = (syncMode as string) === "incremental" && isValidIsoDate + ? `AND LastModifiedDate > ${rawDate}` : ""; const soql = [ - `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, Summary, UrlName,`, + `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, UrlName,`, `Language, LastPublishedDate, ${allContentFields.join(", ")}`, `FROM ${knowledgeApiName}`, `WHERE PublishStatus = 'Online'`, @@ -452,7 +460,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ for (const article of articles) { const title = article.Title || `Article ${article.ArticleNumber}`; - const summary = article.Summary || ""; const articleMeta = { articleId: String(article.Id || ""), @@ -466,61 +473,63 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ const articleUrl = `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}`; // --- Agent Knowledge Source --- - const agentText = buildArticleText(title, summary, agentFields as string[], article); + const agentText = buildArticleText(title, agentFieldList, article); const agentChunks = chunkArticleMarkdown(agentText); if (agentChunks.length > 0) { console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[${articleMeta.articleNumber}] ${title}`), + name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title}`), tags: agentTags as string[], chunkCount: agentChunks.length }); for (let i = 0; i < agentChunks.length; i++) { - console.log(`[Salesforce KC] Agent chunk ${i + 1}/${agentChunks.length} for "${title}"`); + const agentChunkData: Record = { + articleNumber: articleMeta.articleNumber, + role: "agent", + url: articleUrl + }; + if (agentChunks[i].section) agentChunkData.section = agentChunks[i].section; + console.log(`[Salesforce KC] Agent chunk ${i + 1} text length: ${agentChunks[i].text.length}`); await api.createKnowledgeChunk({ knowledgeSourceId: agentSourceId, text: agentChunks[i].text, - data: { - articleNumber: articleMeta.articleNumber, - role: "agent", - section: agentChunks[i].section || null, - url: articleUrl - } + data: agentChunkData }); } } // --- Supervisor Knowledge Source --- - const hasSupervisorContent = (supervisorFields as string[]).some(field => { + const hasSupervisorContent = supervisorFieldList.some(field => { const raw = article[field]; return raw && htmlFieldToMarkdown(String(raw)).length > 0; }); if (hasSupervisorContent) { - const supervisorText = buildArticleText(title, summary, supervisorFields as string[], article); + const supervisorText = buildArticleText(title, supervisorFieldList, article); const supervisorChunks = chunkArticleMarkdown(supervisorText); if (supervisorChunks.length > 0) { console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[${articleMeta.articleNumber}] ${title} - Manager`), + name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title} - Manager`), tags: supervisorTags as string[], chunkCount: supervisorChunks.length }); for (let i = 0; i < supervisorChunks.length; i++) { - console.log(`[Salesforce KC] Supervisor chunk ${i + 1}/${supervisorChunks.length} for "${title}"`); + const supChunkData: Record = { + articleNumber: articleMeta.articleNumber, + role: "supervisor", + url: articleUrl + }; + if (supervisorChunks[i].section) supChunkData.section = supervisorChunks[i].section; + console.log(`[Salesforce KC] Supervisor chunk ${i + 1} text length: ${supervisorChunks[i].text.length}`); await api.createKnowledgeChunk({ knowledgeSourceId: supervisorSourceId, text: supervisorChunks[i].text, - data: { - articleNumber: articleMeta.articleNumber, - role: "supervisor", - section: supervisorChunks[i].section || null, - url: articleUrl - } + data: supChunkData }); } } @@ -542,7 +551,9 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ (knowledgeStoreId as string).trim(), ); - const sfSources = existingSources.filter(s => /^\[([^\]]+)\]/.test(s.name)); + // Sources created by this connector are prefixed with [SF:] + // so the pattern is unambiguous even in a shared knowledge store. + const sfSources = existingSources.filter(s => /^\[SF:[^\]]+\]/.test(s.name)); if (sfSources.length === 0) { console.log("[Salesforce KC] No Salesforce-pattern sources found in store — skipping stale check"); } else { @@ -550,7 +561,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ...new Set( sfSources .map(s => { - const m = s.name.match(/^\[([^\]]+)\]/); + const m = s.name.match(/^\[SF:([^\]]+)\]/); return m ? m[1] : null; }) .filter(Boolean) as string[] @@ -559,7 +570,8 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ console.log(`[Salesforce KC] Verifying ${articleNumbers.length} unique article number(s) in Salesforce…`); - const inClause = articleNumbers.map(n => `'${n}'`).join(", "); + // Escape article numbers before embedding in SOQL IN clause + const inClause = articleNumbers.map(n => `'${n.replace(/'/g, "\\'")}'`).join(", "); const checkSoql = `SELECT ArticleNumber FROM ${knowledgeApiName} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; const checkResult = await salesforceConnection.query(checkSoql, { autoFetch: true }); const activeNumbers = new Set( @@ -567,7 +579,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ); for (const src of sfSources) { - const m = src.name.match(/^\[([^\]]+)\]/); + const m = src.name.match(/^\[SF:([^\]]+)\]/); const articleNumber = m ? m[1] : null; if (articleNumber && !activeNumbers.has(articleNumber)) { console.log(`[Salesforce KC] Removing stale source "${src.name}" (article ${articleNumber} no longer active)`); diff --git a/extensions/salesforce/src/nodes/searchContact.ts b/extensions/salesforce/src/nodes/searchContact.ts index d05b53320..6568cbf54 100644 --- a/extensions/salesforce/src/nodes/searchContact.ts +++ b/extensions/salesforce/src/nodes/searchContact.ts @@ -211,11 +211,14 @@ export const searchContactNode = createNodeDescriptor({ try { const salesforceConnection = await authenticate(oauthConnection); + // Escape single quotes to prevent SOQL injection. + const escapedValue = contactFieldValue.replace(/\\/g, "\\\\").replace(/'/g, "\\'"); + // Step 1: Find the contact ID using the specified field // Note: LIMIT 1 is used here because only one record is ever stored (records[0]). // sobject.retrieve() is used in Step 2 to return all standard and custom fields // without requiring the "View All Data" permission that FIELDS(All) demands. - const soql: string = `SELECT Id FROM Contact WHERE ${contactField} = '${contactFieldValue}' ORDER BY CreatedDate DESC LIMIT 1`; + const soql: string = `SELECT Id FROM Contact WHERE ${contactField} = '${escapedValue}' ORDER BY CreatedDate DESC LIMIT 1`; const result = await salesforceConnection.query(soql); if (result.records.length === 0) { From 7906b6b9665563ea578a529830284cbb7b3f1f56 Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Thu, 5 Mar 2026 21:30:19 +1100 Subject: [PATCH 6/8] Add SOQL validation and hash-based dedup --- .../salesforceKnowledgeConnector.ts | 245 ++++++++++++++---- 1 file changed, 192 insertions(+), 53 deletions(-) diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index e56bb3a52..c86290b61 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -1,3 +1,4 @@ +import * as crypto from "crypto"; import { createKnowledgeConnector } from "@cognigy/extension-tools"; import { authenticate } from "../authenticate"; import { @@ -92,6 +93,38 @@ function sanitizeSourceName(name: string): string { .trim(); } +/** Return true if the string is a valid Salesforce API identifier. */ +function isValidSfApiName(name: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(name); +} + +/** Return true if the string looks like a valid BCP 47 / Salesforce language code. */ +function isValidLanguageCode(lang: string): boolean { + return /^[a-z]{2,3}(_[A-Za-z0-9]{2,8})*$/.test(lang); +} + +/** First 12 hex chars of SHA-256 — used to detect content changes. */ +function shortHash(text: string): string { + return crypto.createHash("sha256").update(text).digest("hex").slice(0, 12); +} + +/** JSON stored in a source description for hash-based dedup. */ +function buildSourceDescription(articleNumber: string, role: string, hash: string): string { + return JSON.stringify({ articleNumber, role, hash, synced: new Date().toISOString() }); +} + +interface SourceMeta { sourceId: string; hash: string; } + +/** Parse a source description written by buildSourceDescription; returns null on failure. */ +function parseSourceMeta(description?: string): SourceMeta | null { + if (!description) return null; + try { + const obj = JSON.parse(description); + if (obj.hash && typeof obj.hash === "string") return { sourceId: "", hash: obj.hash }; + } catch { /* fall through */ } + return null; +} + /** * Build a Markdown document from a Salesforce article's fields. * @@ -419,6 +452,16 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ knowledgeStoreId } = config; + // --- Input validation (prevent SOQL injection) --------------------- + const apiNameRaw = (knowledgeApiName as string)?.trim() ?? ""; + if (!isValidSfApiName(apiNameRaw)) { + throw new Error(`[Salesforce KC] Invalid Knowledge Article Object API Name: "${apiNameRaw}". Must match [A-Za-z_][A-Za-z0-9_]*.`); + } + const langRaw = (language as string)?.trim() ?? ""; + if (!isValidLanguageCode(langRaw)) { + throw new Error(`[Salesforce KC] Invalid Language code: "${langRaw}". Expected format e.g. en_US or de.`); + } + const salesforceConnection = await authenticate(oauthConnection as IOAuthConnection); // Normalise to arrays so the connector doesn't throw if either field is @@ -426,9 +469,19 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ const agentFieldList = Array.isArray(agentFields) ? (agentFields as string[]) : []; const supervisorFieldList = Array.isArray(supervisorFields) ? (supervisorFields as string[]) : []; + // Validate all field API names before embedding them in SOQL + const invalidFields = [...agentFieldList, ...supervisorFieldList].filter(f => !isValidSfApiName(f)); + if (invalidFields.length > 0) { + throw new Error(`[Salesforce KC] Invalid field API name(s): ${invalidFields.join(", ")}. Field names must match [A-Za-z_][A-Za-z0-9_]*.`); + } + // Combine all content fields for a single SOQL query, deduplicated const allContentFields = [...new Set([...agentFieldList, ...supervisorFieldList])]; + if (agentFieldList.length === 0) { + throw new Error("[Salesforce KC] At least one Agent Content Field is required."); + } + // Incremental date filter — validate ISO 8601 format before embedding in SOQL const rawDate = (lastSyncDate as string)?.trim() ?? ""; const isValidIsoDate = /^\d{4}-\d{2}-\d{2}(T[\d:.Z+\-]+)?$/.test(rawDate); @@ -439,12 +492,17 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ? `AND LastModifiedDate > ${rawDate}` : ""; + // Build SELECT — content fields are only appended when non-empty (avoids trailing comma) + const fixedFields = "Id, KnowledgeArticleId, ArticleNumber, Title, UrlName, Language, LastPublishedDate"; + const selectClause = allContentFields.length > 0 + ? `${fixedFields}, ${allContentFields.join(", ")}` + : fixedFields; + const soql = [ - `SELECT Id, KnowledgeArticleId, ArticleNumber, Title, UrlName,`, - `Language, LastPublishedDate, ${allContentFields.join(", ")}`, - `FROM ${knowledgeApiName}`, + `SELECT ${selectClause}`, + `FROM ${apiNameRaw}`, `WHERE PublishStatus = 'Online'`, - `AND Language = '${language}'`, + `AND Language = '${langRaw}'`, `AND IsLatestVersion = true`, dateFilter, `ORDER BY Title ASC` @@ -458,6 +516,48 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ const articles = result.records; console.log(`[Salesforce KC] ${articles.length} article(s) to process`); + // --- Hash-based dedup: build map of existing sources ---------------- + // Key: ":" Value: { sourceId, hash } + const dedupEnabled = + (cognigyApiUrl as string)?.trim() && + (cognigyApiKey as string)?.trim() && + (knowledgeStoreId as string)?.trim(); + + const existingSourceMap = new Map(); + + if (dedupEnabled) { + try { + const existing = await listKnowledgeSources( + (cognigyApiUrl as string).trim(), + (cognigyApiKey as string).trim(), + (knowledgeStoreId as string).trim(), + ); + for (const src of existing) { + const m = src.name.match(/^\[SF:([^\]]+)\]/); + if (!m) continue; + const meta = parseSourceMeta(src.description); + if (meta) { + existingSourceMap.set(`${m[1]}:${meta.hash ? (src.name.includes("Manager") ? "supervisor" : "agent") : ""}`, { sourceId: src._id, hash: meta.hash }); + } + } + // Rebuild with role from description (more reliable) + existingSourceMap.clear(); + for (const src of existing) { + if (!/^\[SF:[^\]]+\]/.test(src.name)) continue; + try { + const obj = JSON.parse(src.description || "{}"); + if (obj.articleNumber && obj.role && obj.hash) { + existingSourceMap.set(`${obj.articleNumber}:${obj.role}`, { sourceId: src._id, hash: obj.hash }); + } + } catch { /* skip unparseable */ } + } + console.log(`[Salesforce KC] Dedup: ${existingSourceMap.size} tracked source(s) found`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.warn(`[Salesforce KC] Could not load existing sources for dedup (will recreate all): ${msg}`); + } + } + for (const article of articles) { const title = article.Title || `Article ${article.ArticleNumber}`; @@ -466,7 +566,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ articleNumber: String(article.ArticleNumber || ""), knowledgeArticleId: String(article.KnowledgeArticleId || ""), urlName: String(article.UrlName || ""), - language: String(article.Language || language), + language: String(article.Language || langRaw), lastPublishedDate: String(article.LastPublishedDate || "") }; @@ -474,29 +574,51 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ // --- Agent Knowledge Source --- const agentText = buildArticleText(title, agentFieldList, article); - const agentChunks = chunkArticleMarkdown(agentText); - - if (agentChunks.length > 0) { - console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); - const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title}`), - tags: agentTags as string[], - chunkCount: agentChunks.length - }); + const agentHash = shortHash(agentText); + const agentKey = `${articleMeta.articleNumber}:agent`; + const agentExisting = existingSourceMap.get(agentKey); + + if (agentExisting && agentExisting.hash === agentHash) { + console.log(`[Salesforce KC] Agent unchanged — skipping: ${articleMeta.articleNumber}`); + } else { + if (agentExisting) { + console.log(`[Salesforce KC] Agent changed — replacing: ${articleMeta.articleNumber}`); + try { + await deleteKnowledgeSourceById( + (cognigyApiUrl as string).trim(), + (cognigyApiKey as string).trim(), + (knowledgeStoreId as string).trim(), + agentExisting.sourceId, + ); + } catch (e) { + console.warn(`[Salesforce KC] Could not delete old agent source: ${e instanceof Error ? e.message : e}`); + } + } - for (let i = 0; i < agentChunks.length; i++) { - const agentChunkData: Record = { - articleNumber: articleMeta.articleNumber, - role: "agent", - url: articleUrl - }; - if (agentChunks[i].section) agentChunkData.section = agentChunks[i].section; - console.log(`[Salesforce KC] Agent chunk ${i + 1} text length: ${agentChunks[i].text.length}`); - await api.createKnowledgeChunk({ - knowledgeSourceId: agentSourceId, - text: agentChunks[i].text, - data: agentChunkData + const agentChunks = chunkArticleMarkdown(agentText); + + if (agentChunks.length > 0) { + console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); + const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ + name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title}`), + description: buildSourceDescription(articleMeta.articleNumber, "agent", agentHash), + tags: agentTags as string[], + chunkCount: agentChunks.length }); + + for (const chunk of agentChunks) { + const agentChunkData: Record = { + articleNumber: articleMeta.articleNumber, + role: "agent", + url: articleUrl + }; + if (chunk.section) agentChunkData.section = chunk.section; + await api.createKnowledgeChunk({ + knowledgeSourceId: agentSourceId, + text: chunk.text, + data: agentChunkData + }); + } } } @@ -508,41 +630,58 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ if (hasSupervisorContent) { const supervisorText = buildArticleText(title, supervisorFieldList, article); - const supervisorChunks = chunkArticleMarkdown(supervisorText); - - if (supervisorChunks.length > 0) { - console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); - const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title} - Manager`), - tags: supervisorTags as string[], - chunkCount: supervisorChunks.length - }); + const supervisorHash = shortHash(supervisorText); + const supervisorKey = `${articleMeta.articleNumber}:supervisor`; + const supervisorExisting = existingSourceMap.get(supervisorKey); - for (let i = 0; i < supervisorChunks.length; i++) { - const supChunkData: Record = { - articleNumber: articleMeta.articleNumber, - role: "supervisor", - url: articleUrl - }; - if (supervisorChunks[i].section) supChunkData.section = supervisorChunks[i].section; - console.log(`[Salesforce KC] Supervisor chunk ${i + 1} text length: ${supervisorChunks[i].text.length}`); - await api.createKnowledgeChunk({ - knowledgeSourceId: supervisorSourceId, - text: supervisorChunks[i].text, - data: supChunkData + if (supervisorExisting && supervisorExisting.hash === supervisorHash) { + console.log(`[Salesforce KC] Supervisor unchanged — skipping: ${articleMeta.articleNumber}`); + } else { + if (supervisorExisting) { + console.log(`[Salesforce KC] Supervisor changed — replacing: ${articleMeta.articleNumber}`); + try { + await deleteKnowledgeSourceById( + (cognigyApiUrl as string).trim(), + (cognigyApiKey as string).trim(), + (knowledgeStoreId as string).trim(), + supervisorExisting.sourceId, + ); + } catch (e) { + console.warn(`[Salesforce KC] Could not delete old supervisor source: ${e instanceof Error ? e.message : e}`); + } + } + + const supervisorChunks = chunkArticleMarkdown(supervisorText); + + if (supervisorChunks.length > 0) { + console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); + const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ + name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title} - Manager`), + description: buildSourceDescription(articleMeta.articleNumber, "supervisor", supervisorHash), + tags: supervisorTags as string[], + chunkCount: supervisorChunks.length }); + + for (const chunk of supervisorChunks) { + const supChunkData: Record = { + articleNumber: articleMeta.articleNumber, + role: "supervisor", + url: articleUrl + }; + if (chunk.section) supChunkData.section = chunk.section; + await api.createKnowledgeChunk({ + knowledgeSourceId: supervisorSourceId, + text: chunk.text, + data: supChunkData + }); + } } } } } // --- Stale article removal --- - const removalEnabled = - (cognigyApiUrl as string)?.trim() && - (cognigyApiKey as string)?.trim() && - (knowledgeStoreId as string)?.trim(); - - if (removalEnabled) { + if (dedupEnabled) { console.log("[Salesforce KC] Checking for stale sources to remove…"); try { const existingSources = await listKnowledgeSources( From 98388ac7593b58375109996fcb3d4041b5604553 Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Thu, 5 Mar 2026 22:02:52 +1100 Subject: [PATCH 7/8] Update salesforceKnowledgeConnector.ts --- .../salesforceKnowledgeConnector.ts | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index c86290b61..ec5051bdf 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -87,7 +87,7 @@ function sanitizeSourceName(name: string): string { return name .replace(/\u2013|\u2014/g, "-") .replace(/&/g, "and") - .replace(/[/?!:()#*+<>=^~%@\\]/g, " ") + .replace(/[/?!()#*+<>=^~%@\\]/g, " ") .replace(/[ \t]{2,}/g, " ") .replace(/-{2,}/g, "-") .trim(); @@ -113,18 +113,6 @@ function buildSourceDescription(articleNumber: string, role: string, hash: strin return JSON.stringify({ articleNumber, role, hash, synced: new Date().toISOString() }); } -interface SourceMeta { sourceId: string; hash: string; } - -/** Parse a source description written by buildSourceDescription; returns null on failure. */ -function parseSourceMeta(description?: string): SourceMeta | null { - if (!description) return null; - try { - const obj = JSON.parse(description); - if (obj.hash && typeof obj.hash === "string") return { sourceId: "", hash: obj.hash }; - } catch { /* fall through */ } - return null; -} - /** * Build a Markdown document from a Salesforce article's fields. * @@ -532,16 +520,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ (cognigyApiKey as string).trim(), (knowledgeStoreId as string).trim(), ); - for (const src of existing) { - const m = src.name.match(/^\[SF:([^\]]+)\]/); - if (!m) continue; - const meta = parseSourceMeta(src.description); - if (meta) { - existingSourceMap.set(`${m[1]}:${meta.hash ? (src.name.includes("Manager") ? "supervisor" : "agent") : ""}`, { sourceId: src._id, hash: meta.hash }); - } - } - // Rebuild with role from description (more reliable) - existingSourceMap.clear(); for (const src of existing) { if (!/^\[SF:[^\]]+\]/.test(src.name)) continue; try { @@ -574,6 +552,14 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ // --- Agent Knowledge Source --- const agentText = buildArticleText(title, agentFieldList, article); + + // Skip if article has no field body content (only the H1 title line) + const agentBody = agentText.replace(/^#[^\n]*\n?/m, "").trim(); + if (!agentBody) { + console.log(`[Salesforce KC] Agent has no body content — skipping: ${articleMeta.articleNumber}`); + continue; + } + const agentHash = shortHash(agentText); const agentKey = `${articleMeta.articleNumber}:agent`; const agentExisting = existingSourceMap.get(agentKey); @@ -711,7 +697,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ // Escape article numbers before embedding in SOQL IN clause const inClause = articleNumbers.map(n => `'${n.replace(/'/g, "\\'")}'`).join(", "); - const checkSoql = `SELECT ArticleNumber FROM ${knowledgeApiName} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; + const checkSoql = `SELECT ArticleNumber FROM ${apiNameRaw} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; const checkResult = await salesforceConnection.query(checkSoql, { autoFetch: true }); const activeNumbers = new Set( checkResult.records.map((r: any) => String(r.ArticleNumber)) From 6f999f6bf3d251671a8ea02ce8231f08f90873d7 Mon Sep 17 00:00:00 2001 From: Matt Muller Date: Sat, 7 Mar 2026 14:09:21 +1100 Subject: [PATCH 8/8] fixes --- extensions/salesforce/package-lock.json | 8 +- extensions/salesforce/package.json | 4 +- .../salesforceKnowledgeConnector.ts | 414 +++++------------- 3 files changed, 111 insertions(+), 315 deletions(-) diff --git a/extensions/salesforce/package-lock.json b/extensions/salesforce/package-lock.json index 7980b0a77..4692caa5e 100644 --- a/extensions/salesforce/package-lock.json +++ b/extensions/salesforce/package-lock.json @@ -9,7 +9,7 @@ "version": "4.5.1", "license": "MIT", "dependencies": { - "@cognigy/extension-tools": "^0.16.1", + "@cognigy/extension-tools": "^0.17.0", "axios": "^1.13.5" }, "devDependencies": { @@ -47,9 +47,9 @@ } }, "node_modules/@cognigy/extension-tools": { - "version": "0.16.6", - "resolved": "https://registry.npmjs.org/@cognigy/extension-tools/-/extension-tools-0.16.6.tgz", - "integrity": "sha512-hWvUZsdDnsfsncIryMolrij2SVMdiKQC5d5zQdF7snNPBiz3Bb2Pg3PD0mlWRMAQtxwXbvRt03jpH3QMiXS38w==", + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/@cognigy/extension-tools/-/extension-tools-0.17.0.tgz", + "integrity": "sha512-JTSQI+1Xm1IBZVGYwJK1lTpIBlJlrrqFpNQJYWZPczRF7ceX11I+aYUm7XY7bnHnFCQxyHKJSDXNXjvG6Jr9Aw==", "license": "SEE LICENSE IN LICENSE" }, "node_modules/@cspotcode/source-map-support": { diff --git a/extensions/salesforce/package.json b/extensions/salesforce/package.json index 1eaa11b66..34ff717c3 100644 --- a/extensions/salesforce/package.json +++ b/extensions/salesforce/package.json @@ -32,7 +32,7 @@ "typescript": "^4.9.5" }, "dependencies": { - "@cognigy/extension-tools": "^0.16.1", + "@cognigy/extension-tools": "^0.17.0", "axios": "^1.13.5" } -} \ No newline at end of file +} diff --git a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts index ec5051bdf..a3c043693 100644 --- a/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts +++ b/extensions/salesforce/src/knowledge-connectors/salesforceKnowledgeConnector.ts @@ -1,10 +1,6 @@ import * as crypto from "crypto"; import { createKnowledgeConnector } from "@cognigy/extension-tools"; import { authenticate } from "../authenticate"; -import { - deleteKnowledgeSourceById, - listKnowledgeSources, -} from "./cognigyManagementApi"; interface IOAuthConnection { consumerKey: string; @@ -13,15 +9,14 @@ interface IOAuthConnection { } /** - * Remove null bytes, C0 and C1 control characters that can cause embedding/store - * failures. Preserves tab (\x09), LF (\x0A), CR (\x0D), and all printable - * Unicode so non-English article content (accented chars, CJK, etc.) is kept intact. + * Remove C0/C1 control characters. Preserves tab, LF, CR, and all printable + * Unicode so non-English article content (accented chars, CJK) is kept intact. */ function sanitizeText(text: string): string { if (!text) return ""; return text - .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "") // C0 control chars (keep tab/LF/CR) - .replace(/[\x7F-\x9F]/g, "") // DEL + C1 control chars + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "") + .replace(/[\x7F-\x9F]/g, "") .replace(/[ \t]{2,}/g, " ") .trim(); } @@ -29,9 +24,9 @@ function sanitizeText(text: string): string { /** * Convert HTML from a Salesforce field to Markdown. * Heading levels are shifted down by fieldDepth so the field-label heading - * (## FieldName) sits above any headings that were inside the HTML content. + * (## FieldName) sits above any headings inside the HTML content. * - * e.g. fieldDepth=2:

→ ###,

→ ####,

→ ##### + * e.g. fieldDepth=2:

→ ###,

→ #### */ function htmlFieldToMarkdown(html: string, fieldDepth: number = 2): string { if (!html) return ""; @@ -40,39 +35,30 @@ function htmlFieldToMarkdown(html: string, fieldDepth: number = 2): string { const stripInner = (s: string) => s.replace(/<[^>]+>/g, "").trim(); const md = html - // Headings — shift down by fieldDepth .replace(/]*>([\s\S]*?)<\/h1>/gi, (_, t) => `${shiftedH(1)} ${stripInner(t)}\n\n`) .replace(/]*>([\s\S]*?)<\/h2>/gi, (_, t) => `${shiftedH(2)} ${stripInner(t)}\n\n`) .replace(/]*>([\s\S]*?)<\/h3>/gi, (_, t) => `${shiftedH(3)} ${stripInner(t)}\n\n`) .replace(/]*>([\s\S]*?)<\/h4>/gi, (_, t) => `${shiftedH(4)} ${stripInner(t)}\n\n`) .replace(/]*>([\s\S]*?)<\/h5>/gi, (_, t) => `${shiftedH(5)} ${stripInner(t)}\n\n`) .replace(/]*>([\s\S]*?)<\/h6>/gi, (_, t) => `${shiftedH(6)} ${stripInner(t)}\n\n`) - // Bold / italic .replace(/<(strong|b)[^>]*>([\s\S]*?)<\/(strong|b)>/gi, "**$2**") .replace(/<(em|i)[^>]*>([\s\S]*?)<\/(em|i)>/gi, "_$2_") - // Lists .replace(/]*>/gi, "").replace(/<\/ul>/gi, "\n") .replace(/]*>/gi, "").replace(/<\/ol>/gi, "\n") .replace(/]*>([\s\S]*?)<\/li>/gi, (_, t) => `- ${stripInner(t)}\n`) - // Tables — simple pipe format .replace(/]*>/gi, "\n").replace(/<\/table>/gi, "\n") .replace(/]*>/gi, "").replace(/<\/t(?:head|body|foot)>/gi, "") .replace(/]*>/gi, "").replace(/<\/tr>/gi, " |\n") .replace(/]*>([\s\S]*?)<\/th>/gi, (_, t) => `| **${stripInner(t)}** `) .replace(/]*>([\s\S]*?)<\/td>/gi, (_, t) => `| ${stripInner(t)} `) - // Block elements .replace(/<\/p>/gi, "\n\n").replace(/]*>/gi, "") .replace(/<\/div>/gi, "\n").replace(/]*>/gi, "") .replace(//gi, "\n") - // Links — keep visible text only .replace(/]*>([\s\S]*?)<\/a>/gi, "$1") - // Strip remaining tags .replace(/<[^>]+>/g, "") - // HTML entities .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">") .replace(/ /g, " ").replace(/"/g, '"').replace(/'/g, "'") .replace(/&[a-z]+;/gi, " ") - // Normalise whitespace .replace(/[ \t]+/g, " ") .replace(/\n{3,}/g, "\n\n") .trim(); @@ -80,9 +66,7 @@ function htmlFieldToMarkdown(html: string, fieldDepth: number = 2): string { return sanitizeText(md); } -/** - * Sanitize a string for use as a Cognigy knowledge source name. - */ +/** Sanitize a string for use as a Cognigy knowledge source name. */ function sanitizeSourceName(name: string): string { return name .replace(/\u2013|\u2014/g, "-") @@ -98,34 +82,28 @@ function isValidSfApiName(name: string): boolean { return /^[A-Za-z_][A-Za-z0-9_]*$/.test(name); } -/** Return true if the string looks like a valid BCP 47 / Salesforce language code. */ +/** Return true if the string looks like a valid Salesforce language code. */ function isValidLanguageCode(lang: string): boolean { return /^[a-z]{2,3}(_[A-Za-z0-9]{2,8})*$/.test(lang); } -/** First 12 hex chars of SHA-256 — used to detect content changes. */ +/** First 12 hex chars of SHA-256 — used as contentHashOrTimestamp. */ function shortHash(text: string): string { return crypto.createHash("sha256").update(text).digest("hex").slice(0, 12); } -/** JSON stored in a source description for hash-based dedup. */ -function buildSourceDescription(articleNumber: string, role: string, hash: string): string { - return JSON.stringify({ articleNumber, role, hash, synced: new Date().toISOString() }); -} - /** * Build a Markdown document from a Salesforce article's fields. * - * Structure: * # Article Title * ## Field Label * * ## Next Field Label * ... * - * Note: the standard Salesforce Summary field is intentionally excluded here. - * Add "Summary" to agentFields if you want it included as a dedicated chunk. - * Including it automatically causes duplicate content when Summary == Overview__c. + * Note: the standard Salesforce Summary field is intentionally excluded. + * Add "Summary" to agentFields if you want it as a dedicated chunk, since + * including it automatically duplicates content when Summary == Overview__c. */ function buildArticleText(title: string, fields: string[], article: any): string { const sections: string[] = []; @@ -137,7 +115,6 @@ function buildArticleText(title: string, fields: string[], article: any): string if (!raw) continue; const content = htmlFieldToMarkdown(String(raw), 2); if (!content) continue; - // Convert API name to a readable label: Manager_Actions__c → Manager Actions const label = field.replace(/__c$/i, "").replace(/_/g, " ").trim(); sections.push(`## ${label}\n\n${content}`); } @@ -153,28 +130,21 @@ const HARD_MAX_CHARS = 1950; const MIN_BODY_CHARS = 30; interface ArticleChunk { - /** Full chunk text sent to Cognigy (includes title prefix + section heading). */ text: string; - /** Section heading for the chunk metadata field. */ section: string; } /** * Split article Markdown into RAG-optimised chunks. - * * Splits at H2 (field-level) boundaries only; every chunk is prefixed with - * the article's H1 title so the LLM always has article context. - * Long H2 sections are further split at paragraph → sentence boundaries. + * the article's H1 title for context. */ function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): ArticleChunk[] { const effective = Math.min(maxChars, HARD_MAX_CHARS); - // Extract the H1 title to use as a prefix in every chunk const titleMatch = markdown.match(/^#\s+(.+)$/m); const titlePrefix = titleMatch ? `# ${titleMatch[1].trim()}\n\n` : ""; - // --- Parse markdown into H1/H2 sections only --------------------------- - // H3+ headings (internal field structure) are kept as content, not split points. const lines = markdown.split("\n"); const sections: Array<{ level: number; heading: string; content: string }> = []; let curLevel = 0; @@ -195,7 +165,6 @@ function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): Articl } else if (started) { curLines.push(line); } - // Lines before the first heading are ignored (empty for well-formed docs) } if (started) { sections.push({ level: curLevel, heading: curHeading, content: curLines.join("\n").trim() }); @@ -204,30 +173,26 @@ function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): Articl const chunks: ArticleChunk[] = []; for (const section of sections) { - // H1 section: title is already in titlePrefix; use content (summary) directly if (section.level === 1) { if (!section.content.trim()) continue; const fullText = titlePrefix + section.content; if (fullText.length <= effective) { chunks.push({ text: fullText, section: "" }); } else { - const subTexts = splitAtBoundaries(section.content, effective - titlePrefix.length); - for (const sub of subTexts) { + for (const sub of splitAtBoundaries(section.content, effective - titlePrefix.length)) { chunks.push({ text: titlePrefix + sub, section: "" }); } } continue; } - // H2 section: field-level chunk const headingLine = `## ${section.heading}\n\n`; - const headingLineCont = `## ${section.heading} (continued)\n\n`; // worst-case length + const headingLineCont = `## ${section.heading} (continued)\n\n`; const fullText = titlePrefix + headingLine + section.content; if (fullText.length <= effective) { chunks.push({ text: fullText, section: section.heading }); } else { - // Use the longer (continued) heading so sub-chunks never overflow const subMax = effective - titlePrefix.length - headingLineCont.length; const subTexts = splitAtBoundaries(section.content, Math.max(subMax, 200)); for (let i = 0; i < subTexts.length; i++) { @@ -240,23 +205,16 @@ function chunkArticleMarkdown(markdown: string, maxChars: number = 1800): Articl } } - // If parsing produced nothing (article has no headings), fall back to plain split if (chunks.length === 0 && markdown.trim()) { return splitAtBoundaries(markdown, effective).map(t => ({ text: t, section: "" })); } - // Filter out chunks with no meaningful prose content return chunks.filter(c => { - const body = c.text - .replace(/^#{1,6}[^\n]*\n/gm, "") - .replace(/\s/g, ""); + const body = c.text.replace(/^#{1,6}[^\n]*\n/gm, "").replace(/\s/g, ""); return body.length >= MIN_BODY_CHARS; }); } -/** - * Split text at natural boundaries (paragraph → newline → sentence → word). - */ function splitAtBoundaries(text: string, maxChars: number): string[] { if (text.length <= maxChars) return [text]; @@ -292,10 +250,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ key: "oauthConnection", label: "Salesforce Connected App", type: "connection", - params: { - connectionType: "oauth", - required: true - } + params: { connectionType: "oauth", required: true } }, { key: "knowledgeApiName", @@ -318,18 +273,11 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ label: "Agent Content Fields", type: "textArray", defaultValue: [ - "Overview__c", - "Details__c", - "Inclusions__c", - "Exclusions__c", - "Information__c", - "Processing_Steps_Text__c", - "Questions__c", - "Answer__c", - "Scripting__c", - "Actions__c" + "Overview__c", "Details__c", "Inclusions__c", "Exclusions__c", + "Information__c", "Processing_Steps_Text__c", "Questions__c", + "Answer__c", "Scripting__c", "Actions__c" ], - description: "API names of article fields to include in agent-accessible knowledge. Content from these fields will be indexed for all agent flows.", + description: "API names of article fields to include in agent-accessible knowledge.", params: { required: true } }, { @@ -337,19 +285,17 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ label: "Agent Knowledge Tags", type: "chipInput", defaultValue: ["agent"], - description: "Tags applied to agent knowledge sources. Agent Copilot should filter by this tag. Supervisor Copilot should filter by both this tag and the supervisor tag to see all content. Press ENTER to add a tag." + description: "Tags applied to agent knowledge sources. Press ENTER to add a tag." }, { key: "supervisorFields", label: "Supervisor / Manager Content Fields", type: "textArray", defaultValue: [ - "Manager_Actions__c", - "Manager_information__c", - "Manager_processing_steps__c", - "Manager_scripting__c" + "Manager_Actions__c", "Manager_information__c", + "Manager_processing_steps__c", "Manager_scripting__c" ], - description: "API names of fields containing manager or supervisor-only content. A separate knowledge source tagged for supervisors will be created for each article that has content in these fields.", + description: "Fields with manager-only content. A separate supervisor-tagged source is created per article.", params: { required: false } }, { @@ -357,14 +303,14 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ label: "Supervisor Knowledge Tags", type: "chipInput", defaultValue: ["supervisor"], - description: "Tags applied to supervisor-only knowledge sources. Supervisor Copilot should filter by both this tag and the agent tag to see full article content including manager sections. Press ENTER to add a tag." + description: "Tags applied to supervisor-only knowledge sources. Press ENTER to add a tag." }, { key: "syncMode", label: "Sync Mode", type: "select", defaultValue: "full", - description: "Full: import all published articles. Incremental: only import articles modified since Last Sync Date.", + description: "Full: fetch all published articles (recommended — unchanged articles are skipped via content hash). Incremental: only fetch articles modified since Last Sync Date.", params: { options: [ { label: "Full", value: "full" }, @@ -376,28 +322,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ key: "lastSyncDate", label: "Last Sync Date", type: "text", - description: "ISO 8601 date used as a filter in Incremental mode, e.g. 2026-01-01T00:00:00Z. Only articles modified after this date will be imported.", - params: { required: false } - }, - { - key: "cognigyApiUrl", - label: "Cognigy API URL", - type: "text", - description: "Base URL of your Cognigy.AI instance, e.g. https://app.cognigy.ai — required for stale article removal.", - params: { required: false } - }, - { - key: "cognigyApiKey", - label: "Cognigy API Key", - type: "text", - description: "API key from Profile → API Keys in Cognigy.AI — required for stale article removal.", - params: { required: false } - }, - { - key: "knowledgeStoreId", - label: "Knowledge Store ID", - type: "text", - description: "The ID of the target knowledge store (visible in the store URL in Cognigy.AI) — required for stale article removal.", + description: "ISO 8601 date for Incremental mode, e.g. 2026-01-01T00:00:00Z. Stale removal is skipped in Incremental mode.", params: { required: false } } ] as const, @@ -412,7 +337,7 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ key: "syncSettings", label: "Sync Settings", defaultCollapsed: true, - fields: ["syncMode", "lastSyncDate", "cognigyApiUrl", "cognigyApiKey", "knowledgeStoreId"] + fields: ["syncMode", "lastSyncDate"] } ], form: [ @@ -424,23 +349,14 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ { type: "section", key: "supervisorAccess" }, { type: "section", key: "syncSettings" } ], - function: async ({ config, api }) => { + function: async ({ config, api, sources }) => { const { - oauthConnection, - knowledgeApiName, - language, - agentFields, - agentTags, - supervisorFields, - supervisorTags, - syncMode, - lastSyncDate, - cognigyApiUrl, - cognigyApiKey, - knowledgeStoreId + oauthConnection, knowledgeApiName, language, + agentFields, agentTags, supervisorFields, supervisorTags, + syncMode, lastSyncDate } = config; - // --- Input validation (prevent SOQL injection) --------------------- + // --- Input validation (prevent SOQL injection) ---------------------- const apiNameRaw = (knowledgeApiName as string)?.trim() ?? ""; if (!isValidSfApiName(apiNameRaw)) { throw new Error(`[Salesforce KC] Invalid Knowledge Article Object API Name: "${apiNameRaw}". Must match [A-Za-z_][A-Za-z0-9_]*.`); @@ -452,25 +368,20 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ const salesforceConnection = await authenticate(oauthConnection as IOAuthConnection); - // Normalise to arrays so the connector doesn't throw if either field is - // omitted from the config (supervisorFields is marked required: false). const agentFieldList = Array.isArray(agentFields) ? (agentFields as string[]) : []; const supervisorFieldList = Array.isArray(supervisorFields) ? (supervisorFields as string[]) : []; - // Validate all field API names before embedding them in SOQL const invalidFields = [...agentFieldList, ...supervisorFieldList].filter(f => !isValidSfApiName(f)); if (invalidFields.length > 0) { - throw new Error(`[Salesforce KC] Invalid field API name(s): ${invalidFields.join(", ")}. Field names must match [A-Za-z_][A-Za-z0-9_]*.`); + throw new Error(`[Salesforce KC] Invalid field API name(s): ${invalidFields.join(", ")}.`); } - - // Combine all content fields for a single SOQL query, deduplicated - const allContentFields = [...new Set([...agentFieldList, ...supervisorFieldList])]; - if (agentFieldList.length === 0) { throw new Error("[Salesforce KC] At least one Agent Content Field is required."); } - // Incremental date filter — validate ISO 8601 format before embedding in SOQL + const allContentFields = [...new Set([...agentFieldList, ...supervisorFieldList])]; + + // Incremental date filter — validate ISO 8601 before embedding in SOQL const rawDate = (lastSyncDate as string)?.trim() ?? ""; const isValidIsoDate = /^\d{4}-\d{2}-\d{2}(T[\d:.Z+\-]+)?$/.test(rawDate); if ((syncMode as string) === "incremental" && rawDate && !isValidIsoDate) { @@ -480,7 +391,6 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ? `AND LastModifiedDate > ${rawDate}` : ""; - // Build SELECT — content fields are only appended when non-empty (avoids trailing comma) const fixedFields = "Id, KnowledgeArticleId, ArticleNumber, Title, UrlName, Language, LastPublishedDate"; const selectClause = allContentFields.length > 0 ? `${fixedFields}, ${allContentFields.join(", ")}` @@ -497,114 +407,59 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ ].filter(Boolean).join(" "); if (dateFilter) { - console.log(`[Salesforce KC] Incremental mode: fetching articles modified after ${(lastSyncDate as string).trim()}`); + console.log(`[Salesforce KC] Incremental mode: articles modified after ${rawDate}`); } const result = await salesforceConnection.query(soql, { autoFetch: true }); const articles = result.records; console.log(`[Salesforce KC] ${articles.length} article(s) to process`); - // --- Hash-based dedup: build map of existing sources ---------------- - // Key: ":" Value: { sourceId, hash } - const dedupEnabled = - (cognigyApiUrl as string)?.trim() && - (cognigyApiKey as string)?.trim() && - (knowledgeStoreId as string)?.trim(); - - const existingSourceMap = new Map(); - - if (dedupEnabled) { - try { - const existing = await listKnowledgeSources( - (cognigyApiUrl as string).trim(), - (cognigyApiKey as string).trim(), - (knowledgeStoreId as string).trim(), - ); - for (const src of existing) { - if (!/^\[SF:[^\]]+\]/.test(src.name)) continue; - try { - const obj = JSON.parse(src.description || "{}"); - if (obj.articleNumber && obj.role && obj.hash) { - existingSourceMap.set(`${obj.articleNumber}:${obj.role}`, { sourceId: src._id, hash: obj.hash }); - } - } catch { /* skip unparseable */ } - } - console.log(`[Salesforce KC] Dedup: ${existingSourceMap.size} tracked source(s) found`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.warn(`[Salesforce KC] Could not load existing sources for dedup (will recreate all): ${msg}`); - } - } + // Track externalIdentifiers created this run — used for stale removal + const processedIds = new Set(); for (const article of articles) { const title = article.Title || `Article ${article.ArticleNumber}`; - - const articleMeta = { - articleId: String(article.Id || ""), - articleNumber: String(article.ArticleNumber || ""), - knowledgeArticleId: String(article.KnowledgeArticleId || ""), - urlName: String(article.UrlName || ""), - language: String(article.Language || langRaw), - lastPublishedDate: String(article.LastPublishedDate || "") - }; - - const articleUrl = `${salesforceConnection.instanceUrl}/lightning/articles/${articleMeta.knowledgeArticleId}`; + const articleNumber = String(article.ArticleNumber || ""); + const knowledgeArticleId = String(article.KnowledgeArticleId || ""); + const articleUrl = `${salesforceConnection.instanceUrl}/lightning/articles/${knowledgeArticleId}`; // --- Agent Knowledge Source --- const agentText = buildArticleText(title, agentFieldList, article); - - // Skip if article has no field body content (only the H1 title line) const agentBody = agentText.replace(/^#[^\n]*\n?/m, "").trim(); if (!agentBody) { - console.log(`[Salesforce KC] Agent has no body content — skipping: ${articleMeta.articleNumber}`); - continue; - } - - const agentHash = shortHash(agentText); - const agentKey = `${articleMeta.articleNumber}:agent`; - const agentExisting = existingSourceMap.get(agentKey); - - if (agentExisting && agentExisting.hash === agentHash) { - console.log(`[Salesforce KC] Agent unchanged — skipping: ${articleMeta.articleNumber}`); + console.log(`[Salesforce KC] Agent has no body content — skipping: ${articleNumber}`); } else { - if (agentExisting) { - console.log(`[Salesforce KC] Agent changed — replacing: ${articleMeta.articleNumber}`); - try { - await deleteKnowledgeSourceById( - (cognigyApiUrl as string).trim(), - (cognigyApiKey as string).trim(), - (knowledgeStoreId as string).trim(), - agentExisting.sourceId, - ); - } catch (e) { - console.warn(`[Salesforce KC] Could not delete old agent source: ${e instanceof Error ? e.message : e}`); - } - } - + const agentExternalId = `${articleNumber}:agent`; + processedIds.add(agentExternalId); const agentChunks = chunkArticleMarkdown(agentText); + const agentHash = shortHash(agentText); + + const agentSource = await api.upsertKnowledgeSource({ + name: sanitizeSourceName(`[SF:${articleNumber}] ${title}`), + description: `Salesforce article ${articleNumber} - agent`, + tags: agentTags as string[], + chunkCount: agentChunks.length, + contentHashOrTimestamp: agentHash, + externalIdentifier: agentExternalId + }); - if (agentChunks.length > 0) { - console.log(`[Salesforce KC] Agent: "${title}" (${articleMeta.articleNumber}) — ${agentChunks.length} chunk(s)`); - const { knowledgeSourceId: agentSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title}`), - description: buildSourceDescription(articleMeta.articleNumber, "agent", agentHash), - tags: agentTags as string[], - chunkCount: agentChunks.length - }); - + if (agentSource) { + console.log(`[Salesforce KC] Agent: "${title}" (${articleNumber}) — ${agentChunks.length} chunk(s)`); for (const chunk of agentChunks) { - const agentChunkData: Record = { - articleNumber: articleMeta.articleNumber, + const data: Record = { + articleNumber, role: "agent", url: articleUrl }; - if (chunk.section) agentChunkData.section = chunk.section; + if (chunk.section) data.section = chunk.section; await api.createKnowledgeChunk({ - knowledgeSourceId: agentSourceId, + knowledgeSourceId: agentSource.knowledgeSourceId, text: chunk.text, - data: agentChunkData + data }); } + } else { + console.log(`[Salesforce KC] Agent unchanged — skipping: ${articleNumber}`); } } @@ -616,115 +471,56 @@ export const salesforceKnowledgeConnector = createKnowledgeConnector({ if (hasSupervisorContent) { const supervisorText = buildArticleText(title, supervisorFieldList, article); + const supervisorExternalId = `${articleNumber}:supervisor`; + processedIds.add(supervisorExternalId); + const supervisorChunks = chunkArticleMarkdown(supervisorText); const supervisorHash = shortHash(supervisorText); - const supervisorKey = `${articleMeta.articleNumber}:supervisor`; - const supervisorExisting = existingSourceMap.get(supervisorKey); - - if (supervisorExisting && supervisorExisting.hash === supervisorHash) { - console.log(`[Salesforce KC] Supervisor unchanged — skipping: ${articleMeta.articleNumber}`); - } else { - if (supervisorExisting) { - console.log(`[Salesforce KC] Supervisor changed — replacing: ${articleMeta.articleNumber}`); - try { - await deleteKnowledgeSourceById( - (cognigyApiUrl as string).trim(), - (cognigyApiKey as string).trim(), - (knowledgeStoreId as string).trim(), - supervisorExisting.sourceId, - ); - } catch (e) { - console.warn(`[Salesforce KC] Could not delete old supervisor source: ${e instanceof Error ? e.message : e}`); - } - } - const supervisorChunks = chunkArticleMarkdown(supervisorText); + const supervisorSource = await api.upsertKnowledgeSource({ + name: sanitizeSourceName(`[SF:${articleNumber}] ${title} - Manager`), + description: `Salesforce article ${articleNumber} - supervisor`, + tags: supervisorTags as string[], + chunkCount: supervisorChunks.length, + contentHashOrTimestamp: supervisorHash, + externalIdentifier: supervisorExternalId + }); - if (supervisorChunks.length > 0) { - console.log(`[Salesforce KC] Supervisor: "${title}" (${articleMeta.articleNumber}) — ${supervisorChunks.length} chunk(s)`); - const { knowledgeSourceId: supervisorSourceId } = await api.createKnowledgeSource({ - name: sanitizeSourceName(`[SF:${articleMeta.articleNumber}] ${title} - Manager`), - description: buildSourceDescription(articleMeta.articleNumber, "supervisor", supervisorHash), - tags: supervisorTags as string[], - chunkCount: supervisorChunks.length + if (supervisorSource) { + console.log(`[Salesforce KC] Supervisor: "${title}" (${articleNumber}) — ${supervisorChunks.length} chunk(s)`); + for (const chunk of supervisorChunks) { + const data: Record = { + articleNumber, + role: "supervisor", + url: articleUrl + }; + if (chunk.section) data.section = chunk.section; + await api.createKnowledgeChunk({ + knowledgeSourceId: supervisorSource.knowledgeSourceId, + text: chunk.text, + data }); - - for (const chunk of supervisorChunks) { - const supChunkData: Record = { - articleNumber: articleMeta.articleNumber, - role: "supervisor", - url: articleUrl - }; - if (chunk.section) supChunkData.section = chunk.section; - await api.createKnowledgeChunk({ - knowledgeSourceId: supervisorSourceId, - text: chunk.text, - data: supChunkData - }); - } } + } else { + console.log(`[Salesforce KC] Supervisor unchanged — skipping: ${articleNumber}`); } } } - // --- Stale article removal --- - if (dedupEnabled) { - console.log("[Salesforce KC] Checking for stale sources to remove…"); - try { - const existingSources = await listKnowledgeSources( - (cognigyApiUrl as string).trim(), - (cognigyApiKey as string).trim(), - (knowledgeStoreId as string).trim(), - ); - - // Sources created by this connector are prefixed with [SF:] - // so the pattern is unambiguous even in a shared knowledge store. - const sfSources = existingSources.filter(s => /^\[SF:[^\]]+\]/.test(s.name)); - if (sfSources.length === 0) { - console.log("[Salesforce KC] No Salesforce-pattern sources found in store — skipping stale check"); - } else { - const articleNumbers = [ - ...new Set( - sfSources - .map(s => { - const m = s.name.match(/^\[SF:([^\]]+)\]/); - return m ? m[1] : null; - }) - .filter(Boolean) as string[] - ) - ]; - - console.log(`[Salesforce KC] Verifying ${articleNumbers.length} unique article number(s) in Salesforce…`); - - // Escape article numbers before embedding in SOQL IN clause - const inClause = articleNumbers.map(n => `'${n.replace(/'/g, "\\'")}'`).join(", "); - const checkSoql = `SELECT ArticleNumber FROM ${apiNameRaw} WHERE ArticleNumber IN (${inClause}) AND PublishStatus = 'Online' AND IsLatestVersion = true`; - const checkResult = await salesforceConnection.query(checkSoql, { autoFetch: true }); - const activeNumbers = new Set( - checkResult.records.map((r: any) => String(r.ArticleNumber)) - ); - - for (const src of sfSources) { - const m = src.name.match(/^\[SF:([^\]]+)\]/); - const articleNumber = m ? m[1] : null; - if (articleNumber && !activeNumbers.has(articleNumber)) { - console.log(`[Salesforce KC] Removing stale source "${src.name}" (article ${articleNumber} no longer active)`); - try { - await deleteKnowledgeSourceById( - (cognigyApiUrl as string).trim(), - (cognigyApiKey as string).trim(), - (knowledgeStoreId as string).trim(), - src._id, - ); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.warn(`[Salesforce KC] Could not remove stale source ${src._id}: ${msg}`); - } - } + // --- Stale source removal --- + // Only run in full sync — in incremental mode we don't have a complete + // picture of all current articles so can't safely identify stale sources. + if ((syncMode as string) !== "incremental") { + for (const source of sources) { + const extId = source.externalIdentifier || source.name; + if (!processedIds.has(extId)) { + console.log(`[Salesforce KC] Removing stale source: ${source.name}`); + try { + await api.deleteKnowledgeSource({ knowledgeSourceId: source.knowledgeSourceId }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.warn(`[Salesforce KC] Could not remove stale source ${source.knowledgeSourceId}: ${msg}`); } } - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.warn(`[Salesforce KC] Stale removal skipped due to error: ${msg}`); } } }