Skip to content
Merged

Test #108

168 changes: 168 additions & 0 deletions lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { describe, it, expect } from "vitest";
import { extractRoomIdFromHtml } from "../extractRoomIdFromHtml";

describe("extractRoomIdFromHtml", () => {
describe("Superhuman reply with conversation link in quoted content", () => {
it("extracts roomId from Superhuman reply with wbr tags in link text", () => {
// This is the actual HTML from a Superhuman reply where the link text
// contains <wbr /> tags for word breaking
const html = `<html>

<head></head>

<body>
<div>
<div>
<div>
<div class="">Send a picture of him <br /></div>
<div class=""><br /></div>
</div>
<div>
<div style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;"><img src="https://r.superhuman.com/4640qXWivTiaNi_anz1bstqoUbWlYj8nnSM0Y-NWmoL_OZdXZ1Zq-_DSPSu7r6M_NMQJAgHCnrKL5OisY6deh83uz8MfXoijSTOwhFcnM5Ya0RU8q8kZDoD0MVTLFtwDxERoN1wu0T-LgI8TDjcWI8K1HEns5_8ETb2EF1fetEenZgrj73FE6Q.gif" alt=" " width="1" height="0" style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;" /><!-- --></div><br />
<div class="gmail_signature">
<div style="clear:both">Sent via <a href="https://sprh.mn/?vip=sidney@recoupable.com" target="_blank">Superhuman</a></div><br />
</div>
</div><br />
<div>
<div class="gmail_quote">On Fri, Jan 09, 2026 at 11:59 AM, Agent by Recoup <span dir="ltr">&lt;<a href="mailto:agent@recoupable.com" target="_blank">agent@recoupable.com</a>&gt;</span> wrote:<br />
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="gmail_extra">
<div class="gmail_quote sh-color-black sh-color">
<p class="sh-color-black sh-color">Short answer: Brian Kernighan.</p>
<p class="sh-color-black sh-color">Details: the earliest known use in computing appears in Kernighan's 1972 tutorial for the B language (the "hello, world!" example). It was then popularized by Kernighan &amp; Ritchie's 1978 book The C Programming Language. (There are older claims—BCPL examples from the late 1960s and the exact phrase appeared as a radio catchphrase in the 1950s—but Kernighan is usually credited for putting it into programming tradition.)</p>
<p cor-black sh-color">Want the sources/links?</p>


<hr style="margin-top:24px;margin-bottom:16px;border:none;border-top:1px solid #e5e7eb;" class="sh-color-grey sh-color" />
<p style="font-size:12px;color:#6b7280;margin:0 0 4px;" class="sh-color-grey sh-color">
Note: you can reply directly to this email to continue the conversation.
</p>
<p style="font-size:12px;color:#6b7280;margin:0;" class="sh-color-grey sh-color">
Or continue the conversation on Recoup:
<a href="https://14158f8b1cbe93481ac078c1f43f3792.us-east-1.resend-links.com/CL0/https:%2F%2Fchat.recoupable.com%2Fchat%2Fd5c473ec-04cf-4a23-a577-e0dc71542392/1/0100019ba3b2dbec-832401f0-a3c6-4478-b6bf-3b0b06b7251a-000000/OomH25B53Pym0ykT2YYxbKx0c_NEhvJ3oFfBzpKKdVk=439" rel="noopener noreferrer" target="_blank" class="sh-color-blue sh-color">
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392
</a>
</p>
</div>
</div>
</blockquote>
</div>
</div><br />
</div>
</div>
</body>

</html>`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
});
});

describe("Gmail reply with proper threading", () => {
it("extracts roomId from Gmail reply with quoted content", () => {
const html = `
<html>
<body>
<p>Thanks for the info!</p>
<div class="gmail_quote">
<blockquote>
<p>Original message here</p>
<p>Continue the conversation: <a href="https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890">https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890</a></p>
</blockquote>
</div>
</body>
</html>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("a1b2c3d4-e5f6-7890-abcd-ef1234567890");
});
});

describe("no conversation ID", () => {
it("returns undefined for undefined input", () => {
const result = extractRoomIdFromHtml(undefined);

expect(result).toBeUndefined();
});

it("returns undefined for empty string", () => {
const result = extractRoomIdFromHtml("");

expect(result).toBeUndefined();
});

it("returns undefined when no chat link present", () => {
const html = "<html><body><p>This email has no Recoup chat link.</p></body></html>";

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});

it("returns undefined for invalid UUID format in link", () => {
const html =
'<a href="https://chat.recoupable.com/chat/not-a-valid-uuid">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});

it("returns undefined for wrong domain", () => {
const html =
'<a href="https://chat.otherdomain.com/chat/550e8400-e29b-41d4-a716-446655440000">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});
});

describe("edge cases", () => {
it("handles URL-encoded link in href attribute", () => {
// Resend tracking redirects URL-encode the destination
const html =
'<a href="https://tracking.example.com/redirect/https:%2F%2Fchat.recoupable.com%2Fchat%2F12345678-1234-1234-1234-123456789abc">Click here</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBe("12345678-1234-1234-1234-123456789abc");
});

it("extracts first roomId when multiple links present", () => {
const html = `
<a href="https://chat.recoupable.com/chat/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">First</a>
<a href="https://chat.recoupable.com/chat/11111111-2222-3333-4444-555555555555">Second</a>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee");
});

it("handles link text with wbr tags breaking up the URL", () => {
const html = `
<a href="#">
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />abcdef12-3456-7890-abcd-ef1234567890
</a>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
});

it("handles mixed case in URL", () => {
const html =
'<a href="HTTPS://CHAT.RECOUPABLE.COM/CHAT/12345678-1234-1234-1234-123456789abc">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBe("12345678-1234-1234-1234-123456789abc");
});
});
});
42 changes: 41 additions & 1 deletion lib/emails/inbound/__tests__/getEmailRoomId.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,54 @@ describe("getEmailRoomId", () => {
});
});

describe("secondary: extracting from email HTML", () => {
it("returns roomId from HTML when text has no chat link", async () => {
const emailContent = {
text: "No chat link in text",
html: '<a href="https://chat.recoupable.com/chat/abcdef12-3456-7890-abcd-ef1234567890">link</a>',
headers: { references: "<old-message-id@example.com>" },
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
expect(mockSelectMemoryEmails).not.toHaveBeenCalled();
});

it("handles Superhuman wbr tags in HTML link text", async () => {
const emailContent = {
text: undefined,
html: '<a href="#">https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392</a>',
headers: {},
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
});

it("prioritizes text over HTML", async () => {
const emailContent = {
text: "https://chat.recoupable.com/chat/11111111-1111-1111-1111-111111111111",
html: '<a href="https://chat.recoupable.com/chat/22222222-2222-2222-2222-222222222222">link</a>',
headers: {},
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("11111111-1111-1111-1111-111111111111");
});
});

describe("fallback: checking references header", () => {
it("falls back to references header when no chat link in text", async () => {
it("falls back to references header when no chat link in text or html", async () => {
mockSelectMemoryEmails.mockResolvedValue([
{ memories: { room_id: "22222222-3333-4444-5555-666666666666" } },
] as Awaited<ReturnType<typeof selectMemoryEmails>>);

const emailContent = {
text: "No chat link here",
html: "<p>No chat link in HTML either</p>",
headers: { references: "<message-id@example.com>" },
} as GetReceivingEmailResponseSuccess;

Expand Down
48 changes: 48 additions & 0 deletions lib/emails/inbound/extractRoomIdFromHtml.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const UUID_PATTERN = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";

// Matches chat.recoupable.com/chat/{uuid} in various formats:
// - Direct URL: https://chat.recoupable.com/chat/uuid
// - URL-encoded (in tracking redirects): chat.recoupable.com%2Fchat%2Fuuid
const CHAT_LINK_PATTERNS = [
new RegExp(`https?://chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, "i"),
new RegExp(`chat\\.recoupable\\.com%2Fchat%2F(${UUID_PATTERN})`, "i"),
];

// Pattern to find UUID after /chat/ or %2Fchat%2F in link text that may contain <wbr /> tags
// The link text version: "https://<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />uuid"
const WBR_STRIPPED_PATTERN = new RegExp(
`chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`,
"i",
);

/**
* Extracts the roomId from email HTML by looking for a Recoup chat link.
* Handles various formats including:
* - Direct URLs in href attributes
* - URL-encoded URLs in tracking redirect links
* - Link text with <wbr /> tags inserted for word breaking (common in Superhuman)
*
* @param html - The email HTML body
* @returns The roomId if found, undefined otherwise
*/
export function extractRoomIdFromHtml(html: string | undefined): string | undefined {
if (!html) return undefined;

// Try direct URL patterns first (most common case)
for (const pattern of CHAT_LINK_PATTERNS) {
const match = html.match(pattern);
if (match?.[1]) {
return match[1];
}
}

// Fallback: strip <wbr /> tags and try again
// This handles Superhuman's link text formatting: "https:/<wbr />/<wbr />chat.<wbr />..."
const strippedHtml = html.replace(/<wbr\s*\/?>/gi, "");
const strippedMatch = strippedHtml.match(WBR_STRIPPED_PATTERN);
if (strippedMatch?.[1]) {
return strippedMatch[1];
}

return undefined;
}
14 changes: 12 additions & 2 deletions lib/emails/inbound/getEmailRoomId.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import type { GetReceivingEmailResponseSuccess } from "resend";
import selectMemoryEmails from "@/lib/supabase/memory_emails/selectMemoryEmails";
import { extractRoomIdFromText } from "./extractRoomIdFromText";
import { extractRoomIdFromHtml } from "./extractRoomIdFromHtml";

/**
* Extracts the roomId from an email. First checks the email text for a Recoup chat link,
* then falls back to looking up existing memory_emails via the references header.
* Extracts the roomId from an email. Checks multiple sources in order:
* 1. Email text body for a Recoup chat link
* 2. Email HTML body for a Recoup chat link (handles Superhuman's wbr tags)
* 3. References header to look up existing memory_emails
*
* @param emailContent - The email content from Resend's Receiving API
* @returns The roomId if found, undefined otherwise
Expand All @@ -18,6 +21,13 @@ export async function getEmailRoomId(
return roomIdFromText;
}

// Secondary: check email HTML for Recoup chat link
// This handles clients like Superhuman that insert <wbr /> tags in link text
const roomIdFromHtml = extractRoomIdFromHtml(emailContent.html);
if (roomIdFromHtml) {
return roomIdFromHtml;
}

// Fallback: check references header for existing memory_emails
const references = emailContent.headers?.references;
if (!references) {
Expand Down