diff --git a/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts b/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts new file mode 100644 index 00000000..5fcf968b --- /dev/null +++ b/lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts @@ -0,0 +1,168 @@ +import { describe, it, expect } from "vitest"; +import { extractRoomIdFromHtml } from "../extractRoomIdFromHtml"; + +describe("extractRoomIdFromHtml", () => { + describe("Superhuman reply with conversation link in quoted content", () => { + it("extracts roomId from Superhuman reply with wbr tags in link text", () => { + // This is the actual HTML from a Superhuman reply where the link text + // contains tags for word breaking + const html = ` + + + + +
+
+
+
Send a picture of him
+

+
+
+

+
+
Sent via Superhuman

+
+

+
+
On Fri, Jan 09, 2026 at 11:59 AM, Agent by Recoup <agent@recoupable.com> wrote:
+
+
+
+

Short answer: Brian Kernighan.

+

Details: the earliest known use in computing appears in Kernighan's 1972 tutorial for the B language (the "hello, world!" example). It was then popularized by Kernighan & Ritchie's 1978 book The C Programming Language. (There are older claims—BCPL examples from the late 1960s and the exact phrase appeared as a radio catchphrase in the 1950s—but Kernighan is usually credited for putting it into programming tradition.)

+

Want the sources/links?

+ + +
+

+ Note: you can reply directly to this email to continue the conversation. +

+

+ Or continue the conversation on Recoup: + + https://chat.recoupable.com/chat/d5c473ec-04cf-4a23-a577-e0dc71542392 + +

+
+
+
+
+

+
+
+ + +`; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392"); + }); + }); + + describe("Gmail reply with proper threading", () => { + it("extracts roomId from Gmail reply with quoted content", () => { + const html = ` + + +

Thanks for the info!

+
+
+

Original message here

+

Continue the conversation: https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890

+
+
+ + + `; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("a1b2c3d4-e5f6-7890-abcd-ef1234567890"); + }); + }); + + describe("no conversation ID", () => { + it("returns undefined for undefined input", () => { + const result = extractRoomIdFromHtml(undefined); + + expect(result).toBeUndefined(); + }); + + it("returns undefined for empty string", () => { + const result = extractRoomIdFromHtml(""); + + expect(result).toBeUndefined(); + }); + + it("returns undefined when no chat link present", () => { + const html = "

This email has no Recoup chat link.

"; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBeUndefined(); + }); + + it("returns undefined for invalid UUID format in link", () => { + const html = + 'link'; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBeUndefined(); + }); + + it("returns undefined for wrong domain", () => { + const html = + 'link'; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBeUndefined(); + }); + }); + + describe("edge cases", () => { + it("handles URL-encoded link in href attribute", () => { + // Resend tracking redirects URL-encode the destination + const html = + 'Click here'; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("12345678-1234-1234-1234-123456789abc"); + }); + + it("extracts first roomId when multiple links present", () => { + const html = ` + First + Second + `; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"); + }); + + it("handles link text with wbr tags breaking up the URL", () => { + const html = ` + + https://chat.recoupable.com/chat/abcdef12-3456-7890-abcd-ef1234567890 + + `; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890"); + }); + + it("handles mixed case in URL", () => { + const html = + 'link'; + + const result = extractRoomIdFromHtml(html); + + expect(result).toBe("12345678-1234-1234-1234-123456789abc"); + }); + }); +}); diff --git a/lib/emails/inbound/__tests__/getEmailRoomId.test.ts b/lib/emails/inbound/__tests__/getEmailRoomId.test.ts index 690beb59..2850f7c3 100644 --- a/lib/emails/inbound/__tests__/getEmailRoomId.test.ts +++ b/lib/emails/inbound/__tests__/getEmailRoomId.test.ts @@ -45,14 +45,54 @@ describe("getEmailRoomId", () => { }); }); + describe("secondary: extracting from email HTML", () => { + it("returns roomId from HTML when text has no chat link", async () => { + const emailContent = { + text: "No chat link in text", + html: 'link', + headers: { references: "" }, + } as GetReceivingEmailResponseSuccess; + + const result = await getEmailRoomId(emailContent); + + expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890"); + expect(mockSelectMemoryEmails).not.toHaveBeenCalled(); + }); + + it("handles Superhuman wbr tags in HTML link text", async () => { + const emailContent = { + text: undefined, + html: 'https://chat.recoupable.com/chat/d5c473ec-04cf-4a23-a577-e0dc71542392', + headers: {}, + } as GetReceivingEmailResponseSuccess; + + const result = await getEmailRoomId(emailContent); + + expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392"); + }); + + it("prioritizes text over HTML", async () => { + const emailContent = { + text: "https://chat.recoupable.com/chat/11111111-1111-1111-1111-111111111111", + html: 'link', + headers: {}, + } as GetReceivingEmailResponseSuccess; + + const result = await getEmailRoomId(emailContent); + + expect(result).toBe("11111111-1111-1111-1111-111111111111"); + }); + }); + describe("fallback: checking references header", () => { - it("falls back to references header when no chat link in text", async () => { + it("falls back to references header when no chat link in text or html", async () => { mockSelectMemoryEmails.mockResolvedValue([ { memories: { room_id: "22222222-3333-4444-5555-666666666666" } }, ] as Awaited>); const emailContent = { text: "No chat link here", + html: "

No chat link in HTML either

", headers: { references: "" }, } as GetReceivingEmailResponseSuccess; diff --git a/lib/emails/inbound/extractRoomIdFromHtml.ts b/lib/emails/inbound/extractRoomIdFromHtml.ts new file mode 100644 index 00000000..f637b17e --- /dev/null +++ b/lib/emails/inbound/extractRoomIdFromHtml.ts @@ -0,0 +1,48 @@ +const UUID_PATTERN = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"; + +// Matches chat.recoupable.com/chat/{uuid} in various formats: +// - Direct URL: https://chat.recoupable.com/chat/uuid +// - URL-encoded (in tracking redirects): chat.recoupable.com%2Fchat%2Fuuid +const CHAT_LINK_PATTERNS = [ + new RegExp(`https?://chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, "i"), + new RegExp(`chat\\.recoupable\\.com%2Fchat%2F(${UUID_PATTERN})`, "i"), +]; + +// Pattern to find UUID after /chat/ or %2Fchat%2F in link text that may contain tags +// The link text version: "https:///chat.recoupable.com/chat/uuid" +const WBR_STRIPPED_PATTERN = new RegExp( + `chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, + "i", +); + +/** + * Extracts the roomId from email HTML by looking for a Recoup chat link. + * Handles various formats including: + * - Direct URLs in href attributes + * - URL-encoded URLs in tracking redirect links + * - Link text with tags inserted for word breaking (common in Superhuman) + * + * @param html - The email HTML body + * @returns The roomId if found, undefined otherwise + */ +export function extractRoomIdFromHtml(html: string | undefined): string | undefined { + if (!html) return undefined; + + // Try direct URL patterns first (most common case) + for (const pattern of CHAT_LINK_PATTERNS) { + const match = html.match(pattern); + if (match?.[1]) { + return match[1]; + } + } + + // Fallback: strip tags and try again + // This handles Superhuman's link text formatting: "https://chat...." + const strippedHtml = html.replace(//gi, ""); + const strippedMatch = strippedHtml.match(WBR_STRIPPED_PATTERN); + if (strippedMatch?.[1]) { + return strippedMatch[1]; + } + + return undefined; +} diff --git a/lib/emails/inbound/getEmailRoomId.ts b/lib/emails/inbound/getEmailRoomId.ts index ef889381..f12db939 100644 --- a/lib/emails/inbound/getEmailRoomId.ts +++ b/lib/emails/inbound/getEmailRoomId.ts @@ -1,10 +1,13 @@ import type { GetReceivingEmailResponseSuccess } from "resend"; import selectMemoryEmails from "@/lib/supabase/memory_emails/selectMemoryEmails"; import { extractRoomIdFromText } from "./extractRoomIdFromText"; +import { extractRoomIdFromHtml } from "./extractRoomIdFromHtml"; /** - * Extracts the roomId from an email. First checks the email text for a Recoup chat link, - * then falls back to looking up existing memory_emails via the references header. + * Extracts the roomId from an email. Checks multiple sources in order: + * 1. Email text body for a Recoup chat link + * 2. Email HTML body for a Recoup chat link (handles Superhuman's wbr tags) + * 3. References header to look up existing memory_emails * * @param emailContent - The email content from Resend's Receiving API * @returns The roomId if found, undefined otherwise @@ -18,6 +21,13 @@ export async function getEmailRoomId( return roomIdFromText; } + // Secondary: check email HTML for Recoup chat link + // This handles clients like Superhuman that insert tags in link text + const roomIdFromHtml = extractRoomIdFromHtml(emailContent.html); + if (roomIdFromHtml) { + return roomIdFromHtml; + } + // Fallback: check references header for existing memory_emails const references = emailContent.headers?.references; if (!references) {