diff --git a/.bumpy/audit-skip-nested-projects.md b/.bumpy/audit-skip-nested-projects.md new file mode 100644 index 000000000..8c313f19f --- /dev/null +++ b/.bumpy/audit-skip-nested-projects.md @@ -0,0 +1,8 @@ +--- +varlock: patch +--- + +Improved `audit` and `init` env var scanning in monorepos: + +- Scanning no longer descends into child packages — any subdirectory with its own `package.json` or `.env.schema` is treated as a separate package and skipped. This fixes spurious results and makes scanning much faster. +- Pure execution-environment plumbing (`PATH`, `HOME`, `SHELL`, `NODE_OPTIONS`, `npm_*`, etc.) is no longer reported as "missing in schema" by `audit`, nor added to inferred schemas by `init`. App-meaningful vars like `NODE_ENV` and CI variables are still reported. diff --git a/packages/varlock-website/src/content/docs/reference/cli-commands.mdx b/packages/varlock-website/src/content/docs/reference/cli-commands.mdx index 1c74a7821..fa4c56560 100644 --- a/packages/varlock-website/src/content/docs/reference/cli-commands.mdx +++ b/packages/varlock-website/src/content/docs/reference/cli-commands.mdx @@ -497,6 +497,8 @@ This command reports two drift categories: - **Missing in schema**: key is used in code but not declared in schema - **Unused in schema**: key is declared in schema but not referenced in code +Pure execution-environment plumbing — variables that reflect *where/how* the process runs (e.g. `PATH`, `HOME`, `SHELL`, `NODE_OPTIONS`, `npm_*`) — is read from `process.env` in normal code but is never part of your schema, so it is **not** reported as missing. Semantically meaningful variables your app or CI may depend on (e.g. `NODE_ENV`, `CI`, GitHub Actions vars) are still reported, so you can decide whether to declare them or suppress them with [`@auditIgnore`](/reference/item-decorators/#auditignore). + Exit codes: - `0` when schema and code are in sync - `1` when drift is detected @@ -537,6 +539,10 @@ varlock audit -i vendor -i generated When `--path` points to a directory, code scanning is scoped to that directory tree. When it points to a file, scanning is scoped to that file's parent directory. ::: +:::note[Monorepos] +Code scanning does not descend into nested projects — any subdirectory that contains its own `package.json` or `.env.schema` is treated as a separate package and skipped. This keeps a parent package's audit from picking up env var references that belong to child packages, and works even in a fresh monorepo where the child packages haven't run `varlock init` yet. Run `varlock audit` inside each package to audit it against its own schema. +::: + :::tip[Suppressing false positives] - Use [`@auditIgnore`](/reference/item-decorators/#auditignore) on individual schema items that are only consumed by external tools and won't appear in your application code. - Use [`@auditIgnorePaths()`](/reference/root-decorators/#auditignorepaths) to exclude directories (e.g., vendored code, generated files) from the code scan. diff --git a/packages/varlock/src/cli/commands/audit.command.ts b/packages/varlock/src/cli/commands/audit.command.ts index e9428651a..114dc836e 100644 --- a/packages/varlock/src/cli/commands/audit.command.ts +++ b/packages/varlock/src/cli/commands/audit.command.ts @@ -14,6 +14,7 @@ import { } from '../helpers/env-var-scanner'; import { gracefulExit } from 'exit-hook'; import { diffSchemaAndCodeKeys } from '../helpers/audit-diff'; +import { isWellKnownEnvKey } from '../helpers/well-known-env-keys'; export const commandSpec = define({ name: 'audit', @@ -182,6 +183,9 @@ export const commandFn: TypedGunshiCommandFn = async (ctx) = const schemaKeys = Object.keys(envGraph.configSchema); const diff = diffSchemaAndCodeKeys(schemaKeys, scanResult.keys); + // Don't report execution-environment plumbing (PATH, NODE_OPTIONS, npm_*, ...) as + // missing - it's read from process.env in real code but never declared in a schema. + const missingInSchema = diff.missingInSchema.filter((key) => !isWellKnownEnvKey(key)); const internallyReferenced = getInternallyReferencedKeys(envGraph); const unusedInSchema: Array = []; for (const key of diff.unusedInSchema) { @@ -196,7 +200,7 @@ export const commandFn: TypedGunshiCommandFn = async (ctx) = unusedInSchema.push(key); } - if (diff.missingInSchema.length === 0 && unusedInSchema.length === 0) { + if (missingInSchema.length === 0 && unusedInSchema.length === 0) { console.log(ansis.green(`✅ Schema and code references are in sync. (scanned ${scanResult.scannedFilesCount} file${scanResult.scannedFilesCount === 1 ? '' : 's'})`)); gracefulExit(0); return; @@ -204,9 +208,9 @@ export const commandFn: TypedGunshiCommandFn = async (ctx) = console.error(ansis.red('\n🚨 Schema/code mismatch detected:\n')); - if (diff.missingInSchema.length > 0) { - console.error(ansis.red(`Missing in schema (${diff.missingInSchema.length}):`)); - for (const key of diff.missingInSchema) { + if (missingInSchema.length > 0) { + console.error(ansis.red(`Missing in schema (${missingInSchema.length}):`)); + for (const key of missingInSchema) { const refs = scanResult.references.filter((r) => r.key === key).slice(0, 3); const refPreview = refs.map((r) => formatReference(finalScanRoot, r)).join(', '); console.error(` - ${ansis.bold(key)}${refPreview ? ansis.dim(` (seen at ${refPreview})`) : ''}`); diff --git a/packages/varlock/src/cli/commands/init.command.ts b/packages/varlock/src/cli/commands/init.command.ts index 8b7f37e2b..b18093158 100644 --- a/packages/varlock/src/cli/commands/init.command.ts +++ b/packages/varlock/src/cli/commands/init.command.ts @@ -22,6 +22,7 @@ import { type TypedGunshiCommandFn } from '../helpers/gunshi-type-utils'; import { findEnvFiles } from '../helpers/find-env-files'; import { tryCatch } from '@env-spec/utils/try-catch'; import { scanCodeForEnvVars } from '../helpers/env-var-scanner'; +import { isWellKnownEnvKey } from '../helpers/well-known-env-keys'; export const commandSpec = define({ name: 'init', @@ -164,7 +165,12 @@ export const commandFn: TypedGunshiCommandFn = async (ctx) = ensureAllItemsExist(parsedEnvSchemaFile, Object.values(parsedEnvFiles)); const scannedCodeKeysToAdd = !exampleFileToConvert - ? scannedCodeEnvKeys.filter((key) => !parsedEnvSchemaFile.configItems.find((i) => i.key === key)) + ? scannedCodeEnvKeys.filter((key) => { + // skip execution-environment plumbing (PATH, NODE_OPTIONS, npm_*, ...) - it's read + // from process.env but isn't app config the user should declare + if (isWellKnownEnvKey(key)) return false; + return !parsedEnvSchemaFile.configItems.find((i) => i.key === key); + }) : []; // add items we detect in source code if no sample/example file was provided diff --git a/packages/varlock/src/cli/commands/test/audit.command.test.ts b/packages/varlock/src/cli/commands/test/audit.command.test.ts index 4af7be0f7..774d0c0ff 100644 --- a/packages/varlock/src/cli/commands/test/audit.command.test.ts +++ b/packages/varlock/src/cli/commands/test/audit.command.test.ts @@ -100,6 +100,35 @@ describe('audit command', () => { expect(gracefulExitMock).toHaveBeenCalledWith(1); }); + test('does not report execution-environment plumbing as missing in schema', async () => { + scanCodeForEnvVarsMock.mockResolvedValue({ + // all code keys are either in the schema or pure plumbing (shell / node flags / npm_*) + keys: ['API_KEY', 'DATABASE_URL', 'PATH', 'HOME', 'NODE_OPTIONS', 'npm_config_user_agent'], + references: [], + scannedFilesCount: 3, + }); + + await commandFn({ values: {} } as any); + + expect(gracefulExitMock).toHaveBeenCalledWith(0); + const errorOutput = consoleErrorSpy.mock.calls.flat().join('\n'); + expect(errorOutput).not.toContain('Missing in schema'); + }); + + test('still reports app-meaningful vars like NODE_ENV as missing in schema', async () => { + scanCodeForEnvVarsMock.mockResolvedValue({ + keys: ['API_KEY', 'DATABASE_URL', 'NODE_ENV'], + references: [], + scannedFilesCount: 1, + }); + + await commandFn({ values: {} } as any); + + expect(gracefulExitMock).toHaveBeenCalledWith(1); + const errorOutput = consoleErrorSpy.mock.calls.flat().join('\n'); + expect(errorOutput).toContain('NODE_ENV'); + }); + test('exits with code 0 when schema and code match', async () => { scanCodeForEnvVarsMock.mockResolvedValue({ keys: ['API_KEY', 'DATABASE_URL'], diff --git a/packages/varlock/src/cli/helpers/env-var-scanner.ts b/packages/varlock/src/cli/helpers/env-var-scanner.ts index 9746419f6..89972a9a3 100644 --- a/packages/varlock/src/cli/helpers/env-var-scanner.ts +++ b/packages/varlock/src/cli/helpers/env-var-scanner.ts @@ -15,6 +15,14 @@ const DEFAULT_MAX_FILE_SIZE_BYTES = 1024 * 1024; const DEFAULT_CONCURRENCY = 50; const ENV_KEY_IDENTIFIER_REGEX = /^[A-Za-z_][A-Za-z0-9_]*$/; +// A subdirectory containing one of these files is treated as a separate project / +// workspace package, so we don't descend into it while scanning the parent. This keeps +// a monorepo root's `audit`/`init` from pulling in env var references that belong to +// child packages. `package.json` covers JS workspace packages (even before they've run +// `varlock init`, and regardless of package manager); `.env.schema` covers +// already-initialized and non-JS projects. +const NESTED_PROJECT_MARKERS = new Set(['package.json', '.env.schema']); + const LANGUAGE_BY_EXTENSION: Record = { '.js': 'js-like', '.mjs': 'js-like', @@ -192,16 +200,36 @@ const JS_DESTRUCTURE_PATTERNS: Array<{ regex: RegExp, syntax: EnvVarSyntax }> = async function discoverSourceFiles(cwd: string, ignoredDirs: Set): Promise> { const filePaths: Array = []; - const globExcludes = [...ignoredDirs].flatMap((dirName) => [`**/${dirName}`, `**/${dirName}/**`]); - for await (const relativePath of fs.glob('**/*', { cwd, exclude: globExcludes })) { - const normalizedRelativePath = String(relativePath).replaceAll('\\', '/'); + async function walk(dir: string, isRoot: boolean): Promise { + let entries; + try { + entries = await fs.readdir(dir, { withFileTypes: true }); + } catch { + return; + } - const extension = path.extname(normalizedRelativePath).toLowerCase(); - if (!(extension in LANGUAGE_BY_EXTENSION)) continue; + // Don't descend into nested projects / workspace packages (the scan root is exempt). + // We detect the boundary from the directory listing we already have - no extra stat. + if (!isRoot && entries.some((entry) => entry.isFile() && NESTED_PROJECT_MARKERS.has(entry.name))) { + return; + } - filePaths.push(path.resolve(cwd, normalizedRelativePath)); + const subdirWalks: Array> = []; + for (const entry of entries) { + if (entry.isDirectory()) { + if (ignoredDirs.has(entry.name)) continue; + subdirWalks.push(walk(path.join(dir, entry.name), false)); + } else if (entry.isFile()) { + const extension = path.extname(entry.name).toLowerCase(); + if (!(extension in LANGUAGE_BY_EXTENSION)) continue; + filePaths.push(path.join(dir, entry.name)); + } + } + await Promise.all(subdirWalks); } + + await walk(cwd, true); return filePaths; } diff --git a/packages/varlock/src/cli/helpers/test/env-var-scanner.test.ts b/packages/varlock/src/cli/helpers/test/env-var-scanner.test.ts index e0945c82d..03b9661ad 100644 --- a/packages/varlock/src/cli/helpers/test/env-var-scanner.test.ts +++ b/packages/varlock/src/cli/helpers/test/env-var-scanner.test.ts @@ -134,4 +134,50 @@ describe('scanCodeForEnvVars', () => { expect(result.keys).not.toContain('DEFAULT_IGNORED'); expect(result.keys).not.toContain('CUSTOM_IGNORED'); }); + + test('does not descend into nested varlock projects (child packages with their own schema)', async () => { + // a child package that is its own varlock project + const childPkg = path.join(tempDir, 'packages', 'child'); + fs.mkdirSync(childPkg, { recursive: true }); + fs.writeFileSync(path.join(childPkg, '.env.schema'), 'CHILD_KEY='); + fs.writeFileSync(path.join(childPkg, 'index.ts'), 'process.env.CHILD_ONLY_KEY'); + + // a nested directory without its own schema - should still be scanned + const innerDir = path.join(tempDir, 'src', 'inner'); + fs.mkdirSync(innerDir, { recursive: true }); + fs.writeFileSync(path.join(innerDir, 'thing.ts'), 'process.env.INNER_KEY'); + + fs.writeFileSync(path.join(tempDir, 'app.ts'), 'process.env.ROOT_KEY'); + + const result = await scanCodeForEnvVars({ cwd: tempDir }); + + expect(result.keys).toContain('ROOT_KEY'); + expect(result.keys).toContain('INNER_KEY'); + expect(result.keys).not.toContain('CHILD_ONLY_KEY'); + }); + + test('does not descend into workspace packages that have a package.json but no schema yet', async () => { + // a fresh monorepo: child package exists but hasn't run `varlock init` yet + const childPkg = path.join(tempDir, 'packages', 'child'); + fs.mkdirSync(childPkg, { recursive: true }); + fs.writeFileSync(path.join(childPkg, 'package.json'), '{ "name": "child" }'); + fs.writeFileSync(path.join(childPkg, 'index.ts'), 'process.env.CHILD_ONLY_KEY'); + + fs.writeFileSync(path.join(tempDir, 'app.ts'), 'process.env.ROOT_KEY'); + + const result = await scanCodeForEnvVars({ cwd: tempDir }); + + expect(result.keys).toContain('ROOT_KEY'); + expect(result.keys).not.toContain('CHILD_ONLY_KEY'); + }); + + test('still scans the root project even though the root has its own package.json/schema', async () => { + fs.writeFileSync(path.join(tempDir, 'package.json'), '{ "name": "root" }'); + fs.writeFileSync(path.join(tempDir, '.env.schema'), 'ROOT_KEY='); + fs.writeFileSync(path.join(tempDir, 'app.ts'), 'process.env.ROOT_KEY'); + + const result = await scanCodeForEnvVars({ cwd: tempDir }); + + expect(result.keys).toContain('ROOT_KEY'); + }); }); diff --git a/packages/varlock/src/cli/helpers/test/well-known-env-keys.test.ts b/packages/varlock/src/cli/helpers/test/well-known-env-keys.test.ts new file mode 100644 index 000000000..0cc9a8b8b --- /dev/null +++ b/packages/varlock/src/cli/helpers/test/well-known-env-keys.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from 'vitest'; + +import { isWellKnownEnvKey } from '../well-known-env-keys'; + +describe('isWellKnownEnvKey', () => { + test('matches OS/shell and node launch-flag plumbing', () => { + for (const key of ['PATH', 'HOME', 'SHELL', 'NODE_OPTIONS', 'NODE_PATH', 'XDG_CONFIG_HOME', 'NO_COLOR']) { + expect(isWellKnownEnvKey(key)).toBe(true); + } + }); + + test('matches case-insensitively (e.g. Windows ComSpec)', () => { + expect(isWellKnownEnvKey('comspec')).toBe(true); + expect(isWellKnownEnvKey('ComSpec')).toBe(true); + }); + + test('matches npm_ prefixed lifecycle vars', () => { + expect(isWellKnownEnvKey('npm_config_user_agent')).toBe(true); + expect(isWellKnownEnvKey('npm_lifecycle_event')).toBe(true); + expect(isWellKnownEnvKey('npm_package_name')).toBe(true); + }); + + test('does NOT match app-meaningful vars (NODE_ENV, CI, GitHub Actions, hosting)', () => { + // these are intentionally still auditable - they affect app/CI behavior + for (const key of ['NODE_ENV', 'CI', 'GITHUB_ACTIONS', 'GITHUB_BASE_REF', 'VERCEL', 'DEBUG']) { + expect(isWellKnownEnvKey(key)).toBe(false); + } + }); + + test('does NOT match application config or secrets', () => { + for (const key of ['PORT', 'HOST', 'DATABASE_URL', 'API_KEY', 'GITHUB_TOKEN', 'STRIPE_SECRET', 'MY_APP_URL']) { + expect(isWellKnownEnvKey(key)).toBe(false); + } + }); +}); diff --git a/packages/varlock/src/cli/helpers/well-known-env-keys.ts b/packages/varlock/src/cli/helpers/well-known-env-keys.ts new file mode 100644 index 000000000..1650e2ff9 --- /dev/null +++ b/packages/varlock/src/cli/helpers/well-known-env-keys.ts @@ -0,0 +1,86 @@ +/** + * Environment variables that are pure *execution-environment plumbing* — an artifact of + * where/how the process was launched (operating system, shell, node runtime flags, package + * manager lifecycle). They are routinely read from `process.env` but are never something an + * application author declares as config, so `varlock audit` should not flag them as "missing + * in schema" and `varlock init` should not add them to a freshly inferred schema. + * + * This list is intentionally NARROW. It deliberately does NOT include semantically + * meaningful variables that an app or its CI may legitimately depend on and may want to + * track — e.g. `NODE_ENV`, the `CI` flag, GitHub Actions / GitLab context vars, or + * hosting-platform markers like `VERCEL`. Those should keep showing up in audit so you can + * decide whether to declare them (or suppress them with `@auditIgnore`). + */ +const WELL_KNOWN_ENV_KEYS = new Set([ + // --- OS / shell --- + 'PATH', + 'PATHEXT', + 'HOME', + 'PWD', + 'OLDPWD', + 'TMPDIR', + 'TMP', + 'TEMP', + 'USER', + 'USERNAME', + 'LOGNAME', + 'SHELL', + 'SHLVL', + 'TERM', + 'TERM_PROGRAM', + 'LANG', + 'LANGUAGE', + 'LC_ALL', + 'LC_CTYPE', + 'HOSTNAME', + 'COLUMNS', + 'LINES', + 'EDITOR', + 'VISUAL', + 'PAGER', + 'DISPLAY', + 'COMSPEC', + 'WINDIR', + 'SYSTEMROOT', + 'APPDATA', + 'LOCALAPPDATA', + 'PROGRAMDATA', + 'XDG_CONFIG_HOME', + 'XDG_CACHE_HOME', + 'XDG_DATA_HOME', + 'XDG_STATE_HOME', + 'XDG_RUNTIME_DIR', + + // --- node.js launch flags (NOT NODE_ENV - that's an app-level mode worth declaring) --- + 'NODE_OPTIONS', + 'NODE_PATH', + 'NODE_DEBUG', + 'NODE_EXTRA_CA_CERTS', + 'NODE_NO_WARNINGS', + 'NODE_TLS_REJECT_UNAUTHORIZED', + + // --- terminal / color output --- + 'NO_COLOR', + 'FORCE_COLOR', + 'COLORTERM', +].map((key) => key.toUpperCase())); + +/** + * Prefixes for families of injected vars. Kept extremely narrow - only families that are + * exclusively set by tooling and never by an application author. + */ +const WELL_KNOWN_ENV_KEY_PREFIXES = [ + // npm / yarn / pnpm / bun lifecycle: npm_config_*, npm_package_*, npm_lifecycle_* + 'NPM_', +]; + +/** + * Returns true if `key` is pure execution-environment plumbing that should be excluded from + * audit drift reporting and from inferred schemas. Matching is case-insensitive (handles + * e.g. `comspec` vs `ComSpec`). + */ +export function isWellKnownEnvKey(key: string): boolean { + const upper = key.toUpperCase(); + if (WELL_KNOWN_ENV_KEYS.has(upper)) return true; + return WELL_KNOWN_ENV_KEY_PREFIXES.some((prefix) => upper.startsWith(prefix)); +}