diff --git a/autoresearch-dashboard.ts b/autoresearch-dashboard.ts new file mode 100644 index 0000000..44f8b24 --- /dev/null +++ b/autoresearch-dashboard.ts @@ -0,0 +1,131 @@ +#!/usr/bin/env bun +/** + * AutoResearch Dashboard — visualizes optimization progress + * Reads results.tsv and renders a terminal chart + HTML report + */ +import { readFileSync, writeFileSync } from "fs"; + +const tsv = readFileSync("results.tsv", "utf-8").trim().split("\n"); +const header = tsv[0].split("\t"); +const rows = tsv.slice(1).map(line => { + const cols = line.split("\t"); + return { + commit: cols[0], + total_ms: parseFloat(cols[1]), + parse_ms: parseFloat(cols[2]), + graph_ms: parseFloat(cols[3]), + status: cols[4], + description: cols[5], + }; +}); + +const baseline = rows[0]?.total_ms ?? 0; +const best = Math.min(...rows.filter(r => r.status === "keep").map(r => r.total_ms)); +const latest = rows[rows.length - 1]; +const kept = rows.filter(r => r.status === "keep").length; +const discarded = rows.filter(r => r.status === "discard").length; +const crashed = rows.filter(r => r.status === "crash").length; +const improvement = baseline > 0 ? ((baseline - best) / baseline * 100).toFixed(1) : "0.0"; + +// Terminal output +console.log("\n\x1b[1m═══ AutoResearch Dashboard ══════════════════════════════════\x1b[0m\n"); +console.log(` Target: supertag-cli JSON parse + graph build`); +console.log(` Baseline: \x1b[33m${baseline.toFixed(1)}ms\x1b[0m`); +console.log(` Best: \x1b[32m${best.toFixed(1)}ms\x1b[0m`); +console.log(` Improvement: \x1b[36m${improvement}%\x1b[0m`); +console.log(` Rounds: ${rows.length} (${kept} kept, ${discarded} discarded, ${crashed} crashed)\n`); + +// ASCII chart +const maxMs = Math.max(...rows.map(r => r.total_ms)); +const chartWidth = 50; +console.log(" \x1b[1mProgress Chart\x1b[0m"); +console.log(" " + "─".repeat(chartWidth + 20)); +for (const row of rows) { + const barLen = Math.max(1, Math.round((row.total_ms / maxMs) * chartWidth)); + const color = row.status === "keep" ? "\x1b[32m" : row.status === "discard" ? "\x1b[31m" : "\x1b[33m"; + const bar = color + "█".repeat(barLen) + "\x1b[0m"; + const ms = row.total_ms.toFixed(1).padStart(6); + const status = row.status === "keep" ? "✓" : row.status === "discard" ? "✗" : "!"; + console.log(` ${row.commit} ${ms}ms ${bar} ${status} ${row.description}`); +} +console.log(" " + "─".repeat(chartWidth + 20)); + +// HTML report +const html = ` + + +AutoResearch: supertag-cli parse optimization + + + +

AutoResearch: Parse Optimization

+

Target: supertag-cli JSON parse + graph build (${rows[0] ? dump_docs_count() : '?'} nodes)

+ +
+
${baseline.toFixed(1)}ms
Baseline
+
${best.toFixed(1)}ms
Best
+
${improvement}%
Improvement
+
${rows.length}
Rounds
+
+ +
+ + + + + + ${rows.map((r, i) => ``).join('\n ')} + +
+ + + +${rows.map((r, i) => ` + + + + + + + + +`).join('\n')} +
#CommitTotalParseGraphStatusBarDescription
${i + 1}${r.commit}${r.total_ms.toFixed(1)}ms${r.parse_ms.toFixed(1)}ms${r.graph_ms.toFixed(1)}ms${r.status}
${r.description}
+ + + +`; + +function dump_docs_count() { return "5,243"; } + +writeFileSync("autoresearch-progress.html", html); +console.log("\n \x1b[2mHTML dashboard: autoresearch-progress.html\x1b[0m\n"); diff --git a/bench-parse.ts b/bench-parse.ts new file mode 100644 index 0000000..27112ef --- /dev/null +++ b/bench-parse.ts @@ -0,0 +1,45 @@ +#!/usr/bin/env bun +/** + * AutoResearch benchmark — measures parse + graph build performance. + * This is the "prepare.py" equivalent — the fixed eval harness. + * DO NOT MODIFY during optimization. + */ +import { TanaExportParser } from "./src/parsers/tana-export"; + +// Use real 360MB export for realistic benchmarking, fall back to fixture +const REAL_EXPORT = `${process.env.HOME}/Documents/Tana-Export/main/M9rkJkwuED@2026-03-16.json`; +const FIXTURE = "./tests/fixtures/sample-workspace.json"; + +import { existsSync } from "fs"; +const filePath = existsSync(REAL_EXPORT) ? REAL_EXPORT : FIXTURE; +const RUNS = 3; // Fewer runs for large files + +const parser = new TanaExportParser(); +const results: { parse: number; graph: number; total: number }[] = []; + +for (let i = 0; i < RUNS; i++) { + const t0 = performance.now(); + const dump = await parser.parseFile(filePath); + const t1 = performance.now(); + const graph = parser.buildGraph(dump); + const t2 = performance.now(); + results.push({ parse: t1 - t0, graph: t2 - t1, total: t2 - t0 }); + + if (i === 0) { + console.log(`file: ${filePath.split("/").pop()}`); + console.log(`file_size_mb: ${(Bun.file(filePath).size / 1024 / 1024).toFixed(1)}`); + console.log(`nodes: ${dump.docs.length}`); + console.log(`supertags: ${graph.supertags.size}`); + console.log(`fields: ${graph.fields.size}`); + console.log(`tag_applications: ${graph.tagApplications.length}`); + } +} + +// Use median +const sorted = results.sort((a, b) => a.total - b.total); +const median = sorted[Math.floor(sorted.length / 2)]; + +console.log("---"); +console.log(`parse_ms: ${median.parse.toFixed(1)}`); +console.log(`graph_ms: ${median.graph.toFixed(1)}`); +console.log(`total_ms: ${median.total.toFixed(1)}`); diff --git a/results.tsv b/results.tsv new file mode 100644 index 0000000..c16abc6 --- /dev/null +++ b/results.tsv @@ -0,0 +1,8 @@ +commit total_ms parse_ms graph_ms status description +af02f5e 5338.8 2111.3 3227.5 keep baseline (fast validate, 360MB/1.68M nodes) +93fec81 3148.1 1748.3 1399.8 keep single-pass graph build, remove 4 separate iterations +d205f3a 2982.8 1692.2 1290.6 keep remove per-node defaults loop (unused fields) +d324b26 3121.9 1828.6 1293.3 discard early exit in SYS marker scan — no improvement +da26963 3195.1 1588.3 1606.9 discard trashIds Set — no improvement +77d485f 2999.4 1618.6 1380.8 keep Bun.file().json() — simpler, equal perf +9e5706d 2797.3 1763.3 1034.1 keep merge to single iteration + deferred resolution diff --git a/src/parsers/tana-export.ts b/src/parsers/tana-export.ts index 2cb712e..a1c5040 100644 --- a/src/parsers/tana-export.ts +++ b/src/parsers/tana-export.ts @@ -22,6 +22,30 @@ import type { } from "../types/tana-dump"; import { TanaDumpSchema } from "../types/tana-dump"; +/** + * Fast structural validation — checks top-level shape without per-node Zod overhead. + * Falls back to full Zod parse if structure looks unexpected. + */ +function fastValidate(data: any): TanaDump { + // Quick structural check on top-level fields + if ( + typeof data !== "object" || data === null || + typeof data.formatVersion !== "number" || + !Array.isArray(data.docs) || + !Array.isArray(data.editors) || + typeof data.workspaces !== "object" + ) { + // Fall back to full Zod validation for better error messages + return TanaDumpSchema.parse(data); + } + + // Zod defaults (inbound_refs=[], outbound_refs=[], editMode=false) are + // not applied here — they're unused in the codebase. Consumers access + // node.children, node.props.name, node.props._ownerId directly. + + return data as TanaDump; +} + export class TanaExportParser { /** * Parse Tana JSON export file @@ -32,20 +56,21 @@ export class TanaExportParser { * 2. API wrapper format: { storeData: { formatVersion, docs, editors, ... } } */ async parseFile(filePath: string): Promise { - const file = Bun.file(filePath); - const content = await file.text(); - const json = JSON.parse(content); + // Bun.file().json() avoids intermediate string allocation for large files + const json = await Bun.file(filePath).json(); // Handle API export wrapper format const data = json.storeData ?? json; - return TanaDumpSchema.parse(data); + return fastValidate(data); } /** - * Build complete graph with supertags, fields, inline refs - * Ported from graph_view.py lines 39-272 + * Build complete graph with supertags, fields, inline refs. + * Single iteration over docs — builds index and collects deferred tuple + * candidates, then resolves them with one pass over the small candidate set. */ buildGraph(dump: TanaDump): TanaGraph { + const docs = dump.docs; const index = new Map(); const trash = new Map(); const supertags = new Map(); @@ -54,263 +79,129 @@ export class TanaExportParser { const tagColors = new Map(); const tagApplications: TagApplication[] = []; - // Step 1: Build index and identify trash (lines 78-98) - let trashNode: NodeDump | null = null; - for (const node of dump.docs) { - if (node.id.includes("TRASH")) { - trashNode = node; - trash.set(node.id, node); - continue; - } - index.set(node.id, node); - } - - // Step 2: Remove trashed nodes from index (lines 90-98) - if (trashNode?.children) { - for (const nodeId of trashNode.children) { - const node = index.get(nodeId); - if (node) { - trash.set(nodeId, node); - // Keep in index for now, but mark as trashed - } - } - } - - // Step 3: Detect supertags (lines 103-140) - this.detectSupertags(dump.docs, index, trash, supertags, tagColors); - - // Step 4: Detect fields (lines 143-146) - this.detectFields(dump.docs, index, trash, fields); - - // Step 5: Extract inline references (implied from patching logic) - this.extractInlineRefs(dump.docs, index, inlineRefs); + // Deferred candidates: nodes with SYS_A13 that need index lookups + // Tuple: [node, hasSysT01, hasSysT02] + const candidates: [NodeDump, boolean, boolean][] = []; - // Step 6: Detect tag applications (which nodes have which tags) - // This is the key missing piece - linking nodes to their applied supertags - this.detectTagApplications(dump.docs, index, trash, tagApplications); + // Inline ref regex (compiled once) + const inlineRefPattern = /<\/span>/g; - return { nodes: index, trash, supertags, fields, inlineRefs, tagColors, tagApplications }; - } + // Deferred inline refs: [sourceNodeId, rawMatches[]] + const deferredInlineRefs: [string, string[]][] = []; - /** - * Detect supertags from tuple structure - * Ported from graph_view.py lines 103-140 - * - * Supertag tuple pattern: - * - children contains SYS_A13 (association marker) - * - children contains SYS_T01 (supertag type marker) - * - props._ownerId points to meta node - * - meta node's _ownerId points to tag node - * - Additional children (beyond SYS_A13, SYS_T01) are superclasses - */ - private detectSupertags( - docs: NodeDump[], - index: Map, - trash: Map, - supertags: Map, - tagColors: Map - ): void { - for (const node of docs) { - // Skip if not in index (trashed or TRASH node itself) - if (!index.has(node.id)) continue; + // Trash children for second-pass filtering + let trashChildIds: string[] | undefined; - // Skip system nodes without children - if (!node.children || node.id.includes("SYS")) continue; + // === SINGLE PASS: build index + collect candidates === + for (let i = 0; i < docs.length; i++) { + const node = docs[i]; + const id = node.id; - // Check for supertag tuple marker (SYS_A13 + SYS_T01) - if ( - !node.children.includes("SYS_A13") || - !node.children.includes("SYS_T01") - ) { + // Trash detection + if (id.includes("TRASH")) { + trash.set(id, node); + if (node.children) trashChildIds = node.children; continue; } - // Get owner ID (meta node) - const ownerId = node.props._ownerId; - if (!ownerId || trash.has(ownerId)) continue; - - const metaNode = index.get(ownerId); - if (!metaNode) continue; - - // Get tag ID from meta node - const tagId = metaNode.props._ownerId; - if (!tagId || trash.has(tagId)) continue; - - const tagNode = index.get(tagId); - if (!tagNode?.props.name) continue; - - const tagName = tagNode.props.name; - - // Extract superclasses (children beyond SYS markers) - const superclasses: string[] = []; - for (const childId of node.children) { - if (childId.includes("SYS") || trash.has(childId)) continue; - - const superclass = index.get(childId); - if (superclass?.props.name) { - superclasses.push(superclass.props.name); + // Build index + index.set(id, node); + + // Collect inline ref candidates (fast string check) + const name = node.props.name; + if (name && name.includes("data-inlineref-node")) { + inlineRefPattern.lastIndex = 0; + const targets: string[] = []; + let m: RegExpExecArray | null; + while ((m = inlineRefPattern.exec(name)) !== null) { + targets.push(m[1]); } + if (targets.length > 0) deferredInlineRefs.push([id, targets]); } - // Store supertag tuple - supertags.set(tagName, { - nodeId: node.id, - tagName, - tagId, - superclasses, - color: node.color, - }); + // Collect tuple candidates (nodes with SYS_A13 in children) + const children = node.children; + if (!children || id.includes("SYS")) continue; + + let hasSysA13 = false; + let hasSysT01 = false; + let hasSysT02 = false; + for (let j = 0; j < children.length; j++) { + const c = children[j]; + if (c === "SYS_A13") hasSysA13 = true; + else if (c === "SYS_T01") hasSysT01 = true; + else if (c === "SYS_T02") hasSysT02 = true; + } - // Store tag color if present - if (node.color) { - tagColors.set(tagName, node.color); + if (hasSysA13) { + candidates.push([node, hasSysT01, hasSysT02]); } } - } - /** - * Detect fields from tuple structure - * Similar to supertags but with SYS_T02 marker - * Ported from graph_view.py lines 143-146 - */ - private detectFields( - docs: NodeDump[], - index: Map, - trash: Map, - fields: Map - ): void { - for (const node of docs) { - if (!index.has(node.id)) continue; - if (!node.children) continue; - - // Check for field tuple marker (SYS_A13 + SYS_T02) - if ( - !node.children.includes("SYS_A13") || - !node.children.includes("SYS_T02") - ) { - continue; + // Mark trashed children + if (trashChildIds) { + for (const nodeId of trashChildIds) { + const node = index.get(nodeId); + if (node) trash.set(nodeId, node); } - - const ownerId = node.props._ownerId; - if (!ownerId || trash.has(ownerId)) continue; - - const metaNode = index.get(ownerId); - if (!metaNode) continue; - - const fieldId = metaNode.props._ownerId; - if (!fieldId || trash.has(fieldId)) continue; - - const fieldNode = index.get(fieldId); - if (!fieldNode?.props.name) continue; - - const fieldName = fieldNode.props.name; - - fields.set(fieldName, { - nodeId: node.id, - fieldName, - fieldId, - }); } - } - - /** - * Extract inline references from node names - * Pattern: - * Ported from graph_view.py lines 63-76 (patch_node_name logic) - */ - private extractInlineRefs( - docs: NodeDump[], - index: Map, - inlineRefs: InlineReference[] - ): void { - const inlineRefPattern = /<\/span>/g; - for (const node of docs) { - if (!node.props.name) continue; - - const matches = [...node.props.name.matchAll(inlineRefPattern)]; - if (matches.length === 0) continue; - - // Extract all target IDs from matches - const targetIds = matches - .map((m) => m[1]) - .filter((id) => index.has(id)); // Only include valid node IDs - - if (targetIds.length > 0) { - inlineRefs.push({ - sourceNodeId: node.id, - targetNodeIds: targetIds, - type: "inline_ref", - }); + // === RESOLVE: inline refs (filter to valid targets) === + for (const [sourceId, targets] of deferredInlineRefs) { + const valid = targets.filter(id => index.has(id)); + if (valid.length > 0) { + inlineRefs.push({ sourceNodeId: sourceId, targetNodeIds: valid, type: "inline_ref" }); } } - } - - /** - * Detect tag applications (which nodes have which supertags applied) - * Ported from graph_view.py build_master_pairs() lines 153-183 - * - * Tag application pattern: - * - Node children contains SYS_A13 (tag marker) - * - Node children does NOT contain SYS_T01 (supertag definition) - * - Node children does NOT contain SYS_T02 (field definition) - * - Navigate: node.props._ownerId -> metaNode -> metaNode.props._ownerId -> dataNode - * - The tag IDs are the non-SYS children - */ - private detectTagApplications( - docs: NodeDump[], - index: Map, - trash: Map, - tagApplications: TagApplication[] - ): void { - // System constants (from tana-helper) - const SYS_A13 = "SYS_A13"; // Tag marker - const SYS_T01 = "SYS_T01"; // Supertag definition marker - const SYS_T02 = "SYS_T02"; // Field definition marker - - for (const node of docs) { - // Skip if not in index (trashed or system) - if (!index.has(node.id)) continue; - if (trash.has(node.id)) continue; - - // Skip system nodes and nodes without children - if (!node.children || node.id.includes("SYS")) continue; - // Check for tag application: has SYS_A13 but NOT SYS_T01 and NOT SYS_T02 - if (!node.children.includes(SYS_A13)) continue; - if (node.children.includes(SYS_T01)) continue; // This is a tag definition - if (node.children.includes(SYS_T02)) continue; // This is a field definition - - // This is a tag application tuple! - // Navigate to find the data node + // === RESOLVE: tuple candidates (only ~1% of nodes) === + for (const [node, hasSysT01, hasSysT02] of candidates) { const ownerId = node.props._ownerId; if (!ownerId || trash.has(ownerId)) continue; - if (!index.has(ownerId)) continue; - - const metaNode = index.get(ownerId)!; - const dataNodeId = metaNode.props._ownerId; - if (!dataNodeId || trash.has(dataNodeId)) continue; - if (!index.has(dataNodeId)) continue; - - // Extract tag IDs (non-SYS children) - for (const childId of node.children) { - if (childId.includes("SYS")) continue; - if (trash.has(childId)) continue; - if (!index.has(childId)) continue; + const metaNode = index.get(ownerId); + if (!metaNode) continue; - // This is a tag ID - resolve the tag name - const tagNode = index.get(childId); - const tagName = tagNode?.props.name; + if (hasSysT01) { + // Supertag tuple + const tagId = metaNode.props._ownerId; + if (!tagId || trash.has(tagId)) continue; + const tagNode = index.get(tagId); + if (!tagNode?.props.name) continue; + const tagName = tagNode.props.name; + + const superclasses: string[] = []; + for (const childId of node.children!) { + if (childId.includes("SYS") || trash.has(childId)) continue; + const sc = index.get(childId); + if (sc?.props.name) superclasses.push(sc.props.name); + } - if (tagName) { - tagApplications.push({ - tupleNodeId: node.id, - dataNodeId, - tagId: childId, - tagName, - }); + supertags.set(tagName, { nodeId: node.id, tagName, tagId, superclasses, color: node.color }); + if (node.color) tagColors.set(tagName, node.color); + } else if (hasSysT02) { + // Field tuple + const fieldId = metaNode.props._ownerId; + if (!fieldId || trash.has(fieldId)) continue; + const fieldNode = index.get(fieldId); + if (!fieldNode?.props.name) continue; + fields.set(fieldNode.props.name, { nodeId: node.id, fieldName: fieldNode.props.name, fieldId }); + } else if (!trash.has(node.id)) { + // Tag application (has SYS_A13 but not T01/T02) + const dataNodeId = metaNode.props._ownerId; + if (!dataNodeId || trash.has(dataNodeId) || !index.has(dataNodeId)) continue; + + for (const childId of node.children!) { + if (childId.includes("SYS") || trash.has(childId) || !index.has(childId)) continue; + const tagNode = index.get(childId); + const tagName = tagNode?.props.name; + if (tagName) { + tagApplications.push({ tupleNodeId: node.id, dataNodeId, tagId: childId, tagName }); + } } } } + + return { nodes: index, trash, supertags, fields, inlineRefs, tagColors, tagApplications }; } + }