Skip to content

Commit

Permalink
feat(scrapeUrl/fire-engine): add blockAds flag (FIR-692) (#1106)
Browse files Browse the repository at this point in the history
* feat(scrapeUrl/fire-engine): add blockAds flag

* feat(v1/scrape): blockAds test
mogery authored Jan 29, 2025
1 parent 5733b82 commit d09e060
Showing 5 changed files with 28 additions and 2 deletions.
25 changes: 23 additions & 2 deletions apps/api/src/__tests__/snips/scrape.test.ts
Original file line number Diff line number Diff line change
@@ -35,8 +35,29 @@ describe("Scrape tests", () => {
"this is fake data coming from the mocking system!",
);
});

describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
});

expectScrapeToSucceed(response);
expect(response.body.data.markdown).not.toContain(".g.doubleclick.net/");
}, 10000);

it.concurrent("doesn't block ads if explicitly disabled", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
blockAds: false,
});

expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain(".g.doubleclick.net/");
}, 10000);
});

describe("Location API", () => {
describe("Location API (f-e dependant)", () => {
it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({
url: "https://iplocation.com",
@@ -54,5 +75,5 @@ describe("Scrape tests", () => {
expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain("| Country | United States |");
});
})
});
});
1 change: 1 addition & 0 deletions apps/api/src/controllers/v1/types.ts
Original file line number Diff line number Diff line change
@@ -186,6 +186,7 @@ export const scrapeOptions = z
removeBase64Images: z.boolean().default(true),
fastMode: z.boolean().default(false),
useMock: z.string().optional(),
blockAds: z.boolean().default(true),
})
.strict(strictMessage);

2 changes: 2 additions & 0 deletions apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
Original file line number Diff line number Diff line change
@@ -197,6 +197,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
mobile: meta.options.mobile,
timeout, // TODO: better timeout logic
disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
blockAds: meta.options.blockAds,
// TODO: scrollXPaths
};

@@ -271,6 +272,7 @@ export async function scrapeURLWithFireEnginePlaywright(
fullPageScreenshot: meta.options.formats.includes("screenshot@fullPage"),
wait: meta.options.waitFor,
geolocation: meta.options.geolocation ?? meta.options.location,
blockAds: meta.options.blockAds,

timeout,
};
Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@ export type FireEngineScrapeRequestChromeCDP = {
blockMedia?: true; // cannot be false
mobile?: boolean;
disableSmartWaitCache?: boolean;
blockAds?: boolean; // default: true
};

export type FireEngineScrapeRequestPlaywright = {
1 change: 1 addition & 0 deletions apps/js-sdk/firecrawl/src/index.ts
Original file line number Diff line number Diff line change
@@ -93,6 +93,7 @@ export interface CrawlScrapeOptions {
mobile?: boolean;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
blockAds?: boolean;
}

export type Action = {

0 comments on commit d09e060

Please sign in to comment.