Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions lib/emails/inbound/__tests__/extractRoomIdFromHtml.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { describe, it, expect } from "vitest";
import { extractRoomIdFromHtml } from "../extractRoomIdFromHtml";

describe("extractRoomIdFromHtml", () => {
describe("Superhuman reply with conversation link in quoted content", () => {
it("extracts roomId from Superhuman reply with wbr tags in link text", () => {
// This is the actual HTML from a Superhuman reply where the link text
// contains <wbr /> tags for word breaking
const html = `<html>

<head></head>

<body>
<div>
<div>
<div>
<div class="">Send a picture of him <br /></div>
<div class=""><br /></div>
</div>
<div>
<div style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;"><img src="https://r.superhuman.com/4640qXWivTiaNi_anz1bstqoUbWlYj8nnSM0Y-NWmoL_OZdXZ1Zq-_DSPSu7r6M_NMQJAgHCnrKL5OisY6deh83uz8MfXoijSTOwhFcnM5Ya0RU8q8kZDoD0MVTLFtwDxERoN1wu0T-LgI8TDjcWI8K1HEns5_8ETb2EF1fetEenZgrj73FE6Q.gif" alt=" " width="1" height="0" style="display: none; border: 0px; width: 0px; height: 0px; overflow: hidden; visibility: hidden;" /><!-- --></div><br />
<div class="gmail_signature">
<div style="clear:both">Sent via <a href="https://sprh.mn/[email protected]" target="_blank">Superhuman</a></div><br />
</div>
</div><br />
<div>
<div class="gmail_quote">On Fri, Jan 09, 2026 at 11:59 AM, Agent by Recoup <span dir="ltr">&lt;<a href="mailto:[email protected]" target="_blank">[email protected]</a>&gt;</span> wrote:<br />
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="gmail_extra">
<div class="gmail_quote sh-color-black sh-color">
<p class="sh-color-black sh-color">Short answer: Brian Kernighan.</p>
<p class="sh-color-black sh-color">Details: the earliest known use in computing appears in Kernighan's 1972 tutorial for the B language (the "hello, world!" example). It was then popularized by Kernighan &amp; Ritchie's 1978 book The C Programming Language. (There are older claims—BCPL examples from the late 1960s and the exact phrase appeared as a radio catchphrase in the 1950s—but Kernighan is usually credited for putting it into programming tradition.)</p>
<p cor-black sh-color">Want the sources/links?</p>


<hr style="margin-top:24px;margin-bottom:16px;border:none;border-top:1px solid #e5e7eb;" class="sh-color-grey sh-color" />
<p style="font-size:12px;color:#6b7280;margin:0 0 4px;" class="sh-color-grey sh-color">
Note: you can reply directly to this email to continue the conversation.
</p>
<p style="font-size:12px;color:#6b7280;margin:0;" class="sh-color-grey sh-color">
Or continue the conversation on Recoup:
<a href="https://14158f8b1cbe93481ac078c1f43f3792.us-east-1.resend-links.com/CL0/https:%2F%2Fchat.recoupable.com%2Fchat%2Fd5c473ec-04cf-4a23-a577-e0dc71542392/1/0100019ba3b2dbec-832401f0-a3c6-4478-b6bf-3b0b06b7251a-000000/OomH25B53Pym0ykT2YYxbKx0c_NEhvJ3oFfBzpKKdVk=439" rel="noopener noreferrer" target="_blank" class="sh-color-blue sh-color">
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392
</a>
</p>
</div>
</div>
</blockquote>
</div>
</div><br />
</div>
</div>
</body>

</html>`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
});
});

describe("Gmail reply with proper threading", () => {
it("extracts roomId from Gmail reply with quoted content", () => {
const html = `
<html>
<body>
<p>Thanks for the info!</p>
<div class="gmail_quote">
<blockquote>
<p>Original message here</p>
<p>Continue the conversation: <a href="https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890">https://chat.recoupable.com/chat/a1b2c3d4-e5f6-7890-abcd-ef1234567890</a></p>
</blockquote>
</div>
</body>
</html>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("a1b2c3d4-e5f6-7890-abcd-ef1234567890");
});
});

describe("no conversation ID", () => {
it("returns undefined for undefined input", () => {
const result = extractRoomIdFromHtml(undefined);

expect(result).toBeUndefined();
});

it("returns undefined for empty string", () => {
const result = extractRoomIdFromHtml("");

expect(result).toBeUndefined();
});

it("returns undefined when no chat link present", () => {
const html = "<html><body><p>This email has no Recoup chat link.</p></body></html>";

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});

it("returns undefined for invalid UUID format in link", () => {
const html =
'<a href="https://chat.recoupable.com/chat/not-a-valid-uuid">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});

it("returns undefined for wrong domain", () => {
const html =
'<a href="https://chat.otherdomain.com/chat/550e8400-e29b-41d4-a716-446655440000">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBeUndefined();
});
});

describe("edge cases", () => {
it("handles URL-encoded link in href attribute", () => {
// Resend tracking redirects URL-encode the destination
const html =
'<a href="https://tracking.example.com/redirect/https:%2F%2Fchat.recoupable.com%2Fchat%2F12345678-1234-1234-1234-123456789abc">Click here</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBe("12345678-1234-1234-1234-123456789abc");
});

it("extracts first roomId when multiple links present", () => {
const html = `
<a href="https://chat.recoupable.com/chat/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee">First</a>
<a href="https://chat.recoupable.com/chat/11111111-2222-3333-4444-555555555555">Second</a>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee");
});

it("handles link text with wbr tags breaking up the URL", () => {
const html = `
<a href="#">
https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />abcdef12-3456-7890-abcd-ef1234567890
</a>
`;

const result = extractRoomIdFromHtml(html);

expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
});

it("handles mixed case in URL", () => {
const html =
'<a href="HTTPS://CHAT.RECOUPABLE.COM/CHAT/12345678-1234-1234-1234-123456789abc">link</a>';

const result = extractRoomIdFromHtml(html);

expect(result).toBe("12345678-1234-1234-1234-123456789abc");
});
});
});
42 changes: 41 additions & 1 deletion lib/emails/inbound/__tests__/getEmailRoomId.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,54 @@ describe("getEmailRoomId", () => {
});
});

describe("secondary: extracting from email HTML", () => {
it("returns roomId from HTML when text has no chat link", async () => {
const emailContent = {
text: "No chat link in text",
html: '<a href="https://chat.recoupable.com/chat/abcdef12-3456-7890-abcd-ef1234567890">link</a>',
headers: { references: "<[email protected]>" },
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("abcdef12-3456-7890-abcd-ef1234567890");
expect(mockSelectMemoryEmails).not.toHaveBeenCalled();
});

it("handles Superhuman wbr tags in HTML link text", async () => {
const emailContent = {
text: undefined,
html: '<a href="#">https:/<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />d5c473ec-04cf-4a23-a577-e0dc71542392</a>',
headers: {},
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("d5c473ec-04cf-4a23-a577-e0dc71542392");
});

it("prioritizes text over HTML", async () => {
const emailContent = {
text: "https://chat.recoupable.com/chat/11111111-1111-1111-1111-111111111111",
html: '<a href="https://chat.recoupable.com/chat/22222222-2222-2222-2222-222222222222">link</a>',
headers: {},
} as GetReceivingEmailResponseSuccess;

const result = await getEmailRoomId(emailContent);

expect(result).toBe("11111111-1111-1111-1111-111111111111");
});
});

describe("fallback: checking references header", () => {
it("falls back to references header when no chat link in text", async () => {
it("falls back to references header when no chat link in text or html", async () => {
mockSelectMemoryEmails.mockResolvedValue([
{ memories: { room_id: "22222222-3333-4444-5555-666666666666" } },
] as Awaited<ReturnType<typeof selectMemoryEmails>>);

const emailContent = {
text: "No chat link here",
html: "<p>No chat link in HTML either</p>",
headers: { references: "<[email protected]>" },
} as GetReceivingEmailResponseSuccess;

Expand Down
48 changes: 48 additions & 0 deletions lib/emails/inbound/extractRoomIdFromHtml.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const UUID_PATTERN = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}";

// Matches chat.recoupable.com/chat/{uuid} in various formats:
// - Direct URL: https://chat.recoupable.com/chat/uuid
// - URL-encoded (in tracking redirects): chat.recoupable.com%2Fchat%2Fuuid
const CHAT_LINK_PATTERNS = [
new RegExp(`https?://chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`, "i"),
new RegExp(`chat\\.recoupable\\.com%2Fchat%2F(${UUID_PATTERN})`, "i"),
];

// Pattern to find UUID after /chat/ or %2Fchat%2F in link text that may contain <wbr /> tags
// The link text version: "https://<wbr />/<wbr />chat.<wbr />recoupable.<wbr />com/<wbr />chat/<wbr />uuid"
const WBR_STRIPPED_PATTERN = new RegExp(
`chat\\.recoupable\\.com/chat/(${UUID_PATTERN})`,
"i",
);

/**
* Extracts the roomId from email HTML by looking for a Recoup chat link.
* Handles various formats including:
* - Direct URLs in href attributes
* - URL-encoded URLs in tracking redirect links
* - Link text with <wbr /> tags inserted for word breaking (common in Superhuman)
*
* @param html - The email HTML body
* @returns The roomId if found, undefined otherwise
*/
export function extractRoomIdFromHtml(html: string | undefined): string | undefined {
if (!html) return undefined;

// Try direct URL patterns first (most common case)
for (const pattern of CHAT_LINK_PATTERNS) {
const match = html.match(pattern);
if (match?.[1]) {
return match[1];
}
}

// Fallback: strip <wbr /> tags and try again
// This handles Superhuman's link text formatting: "https:/<wbr />/<wbr />chat.<wbr />..."
const strippedHtml = html.replace(/<wbr\s*\/?>/gi, "");
const strippedMatch = strippedHtml.match(WBR_STRIPPED_PATTERN);
if (strippedMatch?.[1]) {
return strippedMatch[1];
}

return undefined;
}
14 changes: 12 additions & 2 deletions lib/emails/inbound/getEmailRoomId.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import type { GetReceivingEmailResponseSuccess } from "resend";
import selectMemoryEmails from "@/lib/supabase/memory_emails/selectMemoryEmails";
import { extractRoomIdFromText } from "./extractRoomIdFromText";
import { extractRoomIdFromHtml } from "./extractRoomIdFromHtml";

/**
* Extracts the roomId from an email. First checks the email text for a Recoup chat link,
* then falls back to looking up existing memory_emails via the references header.
* Extracts the roomId from an email. Checks multiple sources in order:
* 1. Email text body for a Recoup chat link
* 2. Email HTML body for a Recoup chat link (handles Superhuman's wbr tags)
* 3. References header to look up existing memory_emails
*
* @param emailContent - The email content from Resend's Receiving API
* @returns The roomId if found, undefined otherwise
Expand All @@ -18,6 +21,13 @@ export async function getEmailRoomId(
return roomIdFromText;
}

// Secondary: check email HTML for Recoup chat link
// This handles clients like Superhuman that insert <wbr /> tags in link text
const roomIdFromHtml = extractRoomIdFromHtml(emailContent.html);
if (roomIdFromHtml) {
return roomIdFromHtml;
}

// Fallback: check references header for existing memory_emails
const references = emailContent.headers?.references;
if (!references) {
Expand Down