diff --git a/src/background/background.js b/src/background/background.js index c35dab9..ed6142b 100644 --- a/src/background/background.js +++ b/src/background/background.js @@ -8,7 +8,21 @@ */ import { initDB } from './initdb.js'; -import handlers from './handlers/index.js'; +// Use named imports for handlers +import { + fetchAndStoreHistory, + getMostVisitedSites, + getLabelCounts, + getCOCounts, + getVisitsPerHour, + getDailyVisitCounts, + getCategoryTrends, + getTransitionPatterns, + getRecencyFrequency, + getUniqueWebsites + // Note: getTimeSpentPerSite is not exported from handlers/index.js, remove if not needed or add it there. +} from './handlers/index.js'; +import handlers from './handlers/index.js'; // Keep default import for window assignment if needed /** * Initialize the extension’s database on startup. @@ -54,10 +68,11 @@ browser.runtime.onMessage.addListener((message, sender, sendResponse) => { return true; } - if (action === "getTimeSpentPerSite") { - getTimeSpentPerSite(days, limit).then(sendResponse); - return true; - } + // Note: getTimeSpentPerSite is not defined/imported. Commenting out or implement handler. + // if (action === "getTimeSpentPerSite") { + // getTimeSpentPerSite(days, limit).then(sendResponse); + // return true; + // } if (action === "getCategoryTrends") { getCategoryTrends(days).then(sendResponse); diff --git a/src/background/services/blacklist.js b/src/background/services/blacklist.js index ad34764..e9e1e3d 100644 --- a/src/background/services/blacklist.js +++ b/src/background/services/blacklist.js @@ -1,76 +1,150 @@ -// heavily ai generated this file can be optimized a lot +/** + * @fileoverview Manages fetching, processing, and checking against domain blocklists. + * Supports multiple blocklist sources (e.g., OISD full, OISD NSFW) and combines them. + * Provides functionality to check if a given URL's domain should be blocked based on the loaded lists. + */ + import { parse } from 'tldts'; -import { OISD_BLOCKLIST_URL } from '../../config'; +import { + OISD_BLOCKLIST_URL, + NSFW_OISD_BLOCKLIST_URL, + BLOCKLIST_ENABLED, + NSFW_BLOCKLIST_ENABLED +} from '../../config'; // Internal state -let oisdBlocklist = new Set(); -let oisdRegexList = []; +/** + * Combined set of all domains from enabled blocklists. + * @type {Set} + */ +let combinedBlocklist = new Set(); +/** + * Flag indicating whether the blocklist(s) have been successfully loaded. + * @type {boolean} + */ let isBlocklistLoaded = false; +/** + * Promise that resolves when the blocklist loading process is complete. + * Used to prevent multiple concurrent load attempts. + * @type {Promise<{ domains: Set }> | null} + */ let blocklistLoadPromise = null; /** - * Load the OISD blocklist (domains) exactly once. - * @returns {Promise<{ domains: Set, regexes: RegExp[] }>} + * Fetches and processes a single blocklist URL. + * Downloads the list, parses it, filters comments/invalid lines, + * normalizes domains, and returns a Set of valid domains. + * @param {string} url The URL to fetch the blocklist from. + * @returns {Promise>} A promise resolving to a Set of domains from the list. + * Returns an empty Set if fetching or processing fails. + */ +async function fetchAndProcessList(url) { + try { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`HTTP Error: ${response.status} for ${url}`); + } + const text = await response.text(); + const lines = text.split('\n').map((l) => l.trim()); + + return new Set( + lines + // Filter out comments, empty lines, etc. + .filter((l) => l && !/^[#!/\s]/.test(l)) + // Normalize: lowercase, remove leading wildcards/dots + .map((l) => l.toLowerCase().replace(/^[*.]+/, '')) + // Filter for valid domain-like strings, excluding IPs + .filter( + (domain) => + /^[a-z0-9.-]+$/.test(domain) && !/^\d+(\.\d+){3}$/.test(domain), + ), + ); + } catch (err) { + console.error(`[blacklist] Failed to load list from ${url}:`, err); + return new Set(); // Return empty set on error for this list + } +} + + +/** + * Loads the configured blocklist(s) based on environment variables. + * Ensures lists are fetched and processed only once. + * Merges domains from multiple enabled lists into `combinedBlocklist`. + * @returns {Promise<{ domains: Set }>} A promise that resolves with an object containing the combined set of blocked domains. + * The promise ensures that the loading process completes before resolving. */ export function loadBlocklist() { if (blocklistLoadPromise) return blocklistLoadPromise; - if (isBlocklistLoaded) return Promise.resolve({ domains: oisdBlocklist, regexes: oisdRegexList }); + if (isBlocklistLoaded) return Promise.resolve({ domains: combinedBlocklist }); blocklistLoadPromise = (async () => { + const listsToFetch = []; + if (BLOCKLIST_ENABLED) { + listsToFetch.push(fetchAndProcessList(OISD_BLOCKLIST_URL)); + } + if (NSFW_BLOCKLIST_ENABLED) { + listsToFetch.push(fetchAndProcessList(NSFW_OISD_BLOCKLIST_URL)); + } + + // If no lists are enabled, resolve immediately with empty set + if (listsToFetch.length === 0) { + console.warn('[blacklist] No blocklists enabled.'); + isBlocklistLoaded = true; + combinedBlocklist = new Set(); + return { domains: combinedBlocklist }; + } + try { - const response = await fetch(OISD_BLOCKLIST_URL); - if (!response.ok) { - throw new Error(`HTTP Error: ${response.status}`); - } - const text = await response.text(); - const lines = text.split('\n').map((l) => l.trim()); - - oisdBlocklist = new Set( - lines - .filter((l) => l && !/^[#!/\s]/.test(l)) - .map((l) => l.toLowerCase().replace(/^[*.]+/, '')) - .filter( - (domain) => - /^[a-z0-9.-]+$/.test(domain) && !/^\d+(\.\d+){3}$/.test(domain), - ), - ); + const results = await Promise.all(listsToFetch); + // Combine all fetched sets into one + combinedBlocklist = new Set(results.flatMap(domainSet => [...domainSet])); + console.log(`[blacklist] Loaded ${combinedBlocklist.size} domains.`); isBlocklistLoaded = true; - return { domains: oisdBlocklist, regexes: oisdRegexList }; + return { domains: combinedBlocklist }; } catch (err) { - console.error('[blacklist] load failed', err); - isBlocklistLoaded = false; - oisdBlocklist.clear(); - oisdRegexList = []; - blocklistLoadPromise = null; - return { domains: oisdBlocklist, regexes: oisdRegexList }; + // This catch might be redundant if fetchAndProcessList handles errors, + // but kept for safety during Promise.all failure. + console.error('[blacklist] load failed during Promise.all:', err); + isBlocklistLoaded = false; // Reset state on major failure + combinedBlocklist.clear(); + blocklistLoadPromise = null; // Allow retry on next call + // Return empty set on major failure, but ensure promise resolves + return { domains: new Set() }; + } })(); return blocklistLoadPromise; } -// Kick off initial load in the background -loadBlocklist(); - /** - * Return true if the given URL’s hostname is on the blocklist. - * Waits until the list is loaded before checking. - * @param {string} url - * @returns {Promise} + * Checks if the domain (or its parent domains) of a given URL is present in the loaded blocklist. + * It ensures the blocklist is loaded before performing the check. + * Checks exact hostname, root domain, and parent domains. + * Logs blocked domains and the matched rule. + * @param {string} url The URL string to check. + * @returns {Promise} A promise resolving to `true` if the URL's domain should be blocked, `false` otherwise. + * Returns `true` if the URL is malformed and cannot be parsed. */ export async function shouldBlockDomain(url) { - const { domains, regexes } = await loadBlocklist(); + // Ensure the list is loaded (or load attempt finished) + const { domains } = await loadBlocklist(); + + // If loading failed or no lists enabled, domains set might be empty. + // An empty set means nothing will be blocked. try { const { hostname } = new URL(url); const host = hostname.toLowerCase(); + const { domain: root } = parse(host); // 1) Match against regex rules (currently unused) - for (const rx of regexes) { - if (rx.test(host)) return true; - } + // Commenting out as 'regexes' is not defined + // for (const rx of regexes) { + // if (rx.test(host)) return true; + // } // 2) Exact hostname match if (domains.has(host)) return true; diff --git a/src/config.js b/src/config.js index 7e4ed55..ad83c56 100644 --- a/src/config.js +++ b/src/config.js @@ -23,11 +23,17 @@ export const DAYS_MAP = { }; /** - * URL of the OISD blocklist (domains or regex patterns). + * URL of the OISD blocklist wildcard domains list. * @constant {string} */ export const OISD_BLOCKLIST_URL = 'https://big.oisd.nl/domainswild'; +/** + * URL of the NSFW OISD blocklist wildcard domains list. + * @constant {string} + */ +export const NSFW_OISD_BLOCKLIST_URL = 'https://nsfw.oisd.nl/domainswild'; + /** * List of category labels used by the ML classifier and reporting. * @constant {string[]} @@ -64,6 +70,18 @@ export const ML_CACHE_SIMILARITY_THRESHOLD = 0.75; */ export const CONCURRENCY_ENABLED = true; +/** + * Enable the main OISD blocklist. + * @constant {boolean} + */ +export const BLOCKLIST_ENABLED = true; + +/** + * Enable the NSFW OISD blocklist. + * @constant {boolean} + */ +export const NSFW_BLOCKLIST_ENABLED = false; // Set to true to enable + /** * Configuration object passed to the ML engine at initialization. look at this link for more information: https://huggingface.co/firefoxrecap/URL-TITLE-classifier * @constant {Object} diff --git a/src/manifest.json b/src/manifest.json index 4a7d369..07e0fb4 100644 --- a/src/manifest.json +++ b/src/manifest.json @@ -9,7 +9,8 @@ "tabs", "webNavigation", "downloads", - "https://big.oisd.nl/*" + "https://big.oisd.nl/*", + "https://nsfw.oisd.nl/*" ], "optional_permissions": [ "trialML" @@ -34,5 +35,5 @@ "web_accessible_resources": [ "recap.html" ], - "content_security_policy": "script-src 'self' 'unsafe-eval'; object-src 'self'; connect-src https://big.oisd.nl blob:;" + "content_security_policy": "script-src 'self' 'unsafe-eval'; object-src 'self'; connect-src https://big.oisd.nl https://nsfw.osid.nl blob:;" } \ No newline at end of file