Skip to content

Commit

Permalink
fix: Simple validations for crawled URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
MohamedBassem committed Mar 21, 2024
1 parent 7d7d375 commit 7170750
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,22 @@ async function getBookmarkUrl(bookmarkId: string) {
return bookmark.url;
}

/**
* This provides some "basic" protection from malicious URLs. However, all of those
* can be easily circumvented by pointing dns of origin to localhost, or with
* redirects.
*/
function validateUrl(url: string) {
const urlParsed = new URL(url);
if (urlParsed.protocol != "http:" && urlParsed.protocol != "https:") {
throw new Error(`Unsupported URL protocol: ${urlParsed.protocol}`);
}

if (["localhost", "127.0.0.1", "0.0.0.0"].includes(urlParsed.hostname)) {
throw new Error(`Link hostname rejected: ${urlParsed.hostname}`);
}
}

async function crawlPage(url: string) {
assert(browser);
const context = await browser.createBrowserContext();
Expand Down Expand Up @@ -158,7 +174,7 @@ async function runCrawler(job: Job<ZCrawlLinkRequest, void>) {
logger.info(
`[Crawler][${jobId}] Will crawl "${url}" for link with id "${bookmarkId}"`,
);
// TODO(IMPORTANT): Run security validations on the input URL (e.g. deny localhost, etc)
validateUrl(url);

const htmlContent = await crawlPage(url);

Expand Down

0 comments on commit 7170750

Please sign in to comment.