diff --git a/autoresearch-dashboard.ts b/autoresearch-dashboard.ts
new file mode 100644
index 0000000..44f8b24
--- /dev/null
+++ b/autoresearch-dashboard.ts
@@ -0,0 +1,131 @@
+#!/usr/bin/env bun
+/**
+ * AutoResearch Dashboard — visualizes optimization progress
+ * Reads results.tsv and renders a terminal chart + HTML report
+ */
+import { readFileSync, writeFileSync } from "fs";
+
+const tsv = readFileSync("results.tsv", "utf-8").trim().split("\n");
+const header = tsv[0].split("\t");
+const rows = tsv.slice(1).map(line => {
+ const cols = line.split("\t");
+ return {
+ commit: cols[0],
+ total_ms: parseFloat(cols[1]),
+ parse_ms: parseFloat(cols[2]),
+ graph_ms: parseFloat(cols[3]),
+ status: cols[4],
+ description: cols[5],
+ };
+});
+
+const baseline = rows[0]?.total_ms ?? 0;
+const best = Math.min(...rows.filter(r => r.status === "keep").map(r => r.total_ms));
+const latest = rows[rows.length - 1];
+const kept = rows.filter(r => r.status === "keep").length;
+const discarded = rows.filter(r => r.status === "discard").length;
+const crashed = rows.filter(r => r.status === "crash").length;
+const improvement = baseline > 0 ? ((baseline - best) / baseline * 100).toFixed(1) : "0.0";
+
+// Terminal output
+console.log("\n\x1b[1m═══ AutoResearch Dashboard ══════════════════════════════════\x1b[0m\n");
+console.log(` Target: supertag-cli JSON parse + graph build`);
+console.log(` Baseline: \x1b[33m${baseline.toFixed(1)}ms\x1b[0m`);
+console.log(` Best: \x1b[32m${best.toFixed(1)}ms\x1b[0m`);
+console.log(` Improvement: \x1b[36m${improvement}%\x1b[0m`);
+console.log(` Rounds: ${rows.length} (${kept} kept, ${discarded} discarded, ${crashed} crashed)\n`);
+
+// ASCII chart
+const maxMs = Math.max(...rows.map(r => r.total_ms));
+const chartWidth = 50;
+console.log(" \x1b[1mProgress Chart\x1b[0m");
+console.log(" " + "─".repeat(chartWidth + 20));
+for (const row of rows) {
+ const barLen = Math.max(1, Math.round((row.total_ms / maxMs) * chartWidth));
+ const color = row.status === "keep" ? "\x1b[32m" : row.status === "discard" ? "\x1b[31m" : "\x1b[33m";
+ const bar = color + "█".repeat(barLen) + "\x1b[0m";
+ const ms = row.total_ms.toFixed(1).padStart(6);
+ const status = row.status === "keep" ? "✓" : row.status === "discard" ? "✗" : "!";
+ console.log(` ${row.commit} ${ms}ms ${bar} ${status} ${row.description}`);
+}
+console.log(" " + "─".repeat(chartWidth + 20));
+
+// HTML report
+const html = `
+
+
+AutoResearch: supertag-cli parse optimization
+
+
+
+AutoResearch: Parse Optimization
+Target: supertag-cli JSON parse + graph build (${rows[0] ? dump_docs_count() : '?'} nodes)
+
+
+
${baseline.toFixed(1)}ms
Baseline
+
+
${improvement}%
Improvement
+
+
+
+
+
+
+
+
+| # | Commit | Total | Parse | Graph | Status | Bar | Description |
+${rows.map((r, i) => `
+ | ${i + 1} |
+ ${r.commit} |
+ ${r.total_ms.toFixed(1)}ms |
+ ${r.parse_ms.toFixed(1)}ms |
+ ${r.graph_ms.toFixed(1)}ms |
+ ${r.status} |
+ |
+ ${r.description} |
+
`).join('\n')}
+
+
+
+
+`;
+
+function dump_docs_count() { return "5,243"; }
+
+writeFileSync("autoresearch-progress.html", html);
+console.log("\n \x1b[2mHTML dashboard: autoresearch-progress.html\x1b[0m\n");
diff --git a/bench-parse.ts b/bench-parse.ts
new file mode 100644
index 0000000..27112ef
--- /dev/null
+++ b/bench-parse.ts
@@ -0,0 +1,45 @@
+#!/usr/bin/env bun
+/**
+ * AutoResearch benchmark — measures parse + graph build performance.
+ * This is the "prepare.py" equivalent — the fixed eval harness.
+ * DO NOT MODIFY during optimization.
+ */
+import { TanaExportParser } from "./src/parsers/tana-export";
+
+// Use real 360MB export for realistic benchmarking, fall back to fixture
+const REAL_EXPORT = `${process.env.HOME}/Documents/Tana-Export/main/M9rkJkwuED@2026-03-16.json`;
+const FIXTURE = "./tests/fixtures/sample-workspace.json";
+
+import { existsSync } from "fs";
+const filePath = existsSync(REAL_EXPORT) ? REAL_EXPORT : FIXTURE;
+const RUNS = 3; // Fewer runs for large files
+
+const parser = new TanaExportParser();
+const results: { parse: number; graph: number; total: number }[] = [];
+
+for (let i = 0; i < RUNS; i++) {
+ const t0 = performance.now();
+ const dump = await parser.parseFile(filePath);
+ const t1 = performance.now();
+ const graph = parser.buildGraph(dump);
+ const t2 = performance.now();
+ results.push({ parse: t1 - t0, graph: t2 - t1, total: t2 - t0 });
+
+ if (i === 0) {
+ console.log(`file: ${filePath.split("/").pop()}`);
+ console.log(`file_size_mb: ${(Bun.file(filePath).size / 1024 / 1024).toFixed(1)}`);
+ console.log(`nodes: ${dump.docs.length}`);
+ console.log(`supertags: ${graph.supertags.size}`);
+ console.log(`fields: ${graph.fields.size}`);
+ console.log(`tag_applications: ${graph.tagApplications.length}`);
+ }
+}
+
+// Use median
+const sorted = results.sort((a, b) => a.total - b.total);
+const median = sorted[Math.floor(sorted.length / 2)];
+
+console.log("---");
+console.log(`parse_ms: ${median.parse.toFixed(1)}`);
+console.log(`graph_ms: ${median.graph.toFixed(1)}`);
+console.log(`total_ms: ${median.total.toFixed(1)}`);
diff --git a/results.tsv b/results.tsv
new file mode 100644
index 0000000..c16abc6
--- /dev/null
+++ b/results.tsv
@@ -0,0 +1,8 @@
+commit total_ms parse_ms graph_ms status description
+af02f5e 5338.8 2111.3 3227.5 keep baseline (fast validate, 360MB/1.68M nodes)
+93fec81 3148.1 1748.3 1399.8 keep single-pass graph build, remove 4 separate iterations
+d205f3a 2982.8 1692.2 1290.6 keep remove per-node defaults loop (unused fields)
+d324b26 3121.9 1828.6 1293.3 discard early exit in SYS marker scan — no improvement
+da26963 3195.1 1588.3 1606.9 discard trashIds Set — no improvement
+77d485f 2999.4 1618.6 1380.8 keep Bun.file().json() — simpler, equal perf
+9e5706d 2797.3 1763.3 1034.1 keep merge to single iteration + deferred resolution
diff --git a/src/parsers/tana-export.ts b/src/parsers/tana-export.ts
index 2cb712e..a1c5040 100644
--- a/src/parsers/tana-export.ts
+++ b/src/parsers/tana-export.ts
@@ -22,6 +22,30 @@ import type {
} from "../types/tana-dump";
import { TanaDumpSchema } from "../types/tana-dump";
+/**
+ * Fast structural validation — checks top-level shape without per-node Zod overhead.
+ * Falls back to full Zod parse if structure looks unexpected.
+ */
+function fastValidate(data: any): TanaDump {
+ // Quick structural check on top-level fields
+ if (
+ typeof data !== "object" || data === null ||
+ typeof data.formatVersion !== "number" ||
+ !Array.isArray(data.docs) ||
+ !Array.isArray(data.editors) ||
+ typeof data.workspaces !== "object"
+ ) {
+ // Fall back to full Zod validation for better error messages
+ return TanaDumpSchema.parse(data);
+ }
+
+ // Zod defaults (inbound_refs=[], outbound_refs=[], editMode=false) are
+ // not applied here — they're unused in the codebase. Consumers access
+ // node.children, node.props.name, node.props._ownerId directly.
+
+ return data as TanaDump;
+}
+
export class TanaExportParser {
/**
* Parse Tana JSON export file
@@ -32,20 +56,21 @@ export class TanaExportParser {
* 2. API wrapper format: { storeData: { formatVersion, docs, editors, ... } }
*/
async parseFile(filePath: string): Promise {
- const file = Bun.file(filePath);
- const content = await file.text();
- const json = JSON.parse(content);
+ // Bun.file().json() avoids intermediate string allocation for large files
+ const json = await Bun.file(filePath).json();
// Handle API export wrapper format
const data = json.storeData ?? json;
- return TanaDumpSchema.parse(data);
+ return fastValidate(data);
}
/**
- * Build complete graph with supertags, fields, inline refs
- * Ported from graph_view.py lines 39-272
+ * Build complete graph with supertags, fields, inline refs.
+ * Single iteration over docs — builds index and collects deferred tuple
+ * candidates, then resolves them with one pass over the small candidate set.
*/
buildGraph(dump: TanaDump): TanaGraph {
+ const docs = dump.docs;
const index = new Map();
const trash = new Map();
const supertags = new Map();
@@ -54,263 +79,129 @@ export class TanaExportParser {
const tagColors = new Map();
const tagApplications: TagApplication[] = [];
- // Step 1: Build index and identify trash (lines 78-98)
- let trashNode: NodeDump | null = null;
- for (const node of dump.docs) {
- if (node.id.includes("TRASH")) {
- trashNode = node;
- trash.set(node.id, node);
- continue;
- }
- index.set(node.id, node);
- }
-
- // Step 2: Remove trashed nodes from index (lines 90-98)
- if (trashNode?.children) {
- for (const nodeId of trashNode.children) {
- const node = index.get(nodeId);
- if (node) {
- trash.set(nodeId, node);
- // Keep in index for now, but mark as trashed
- }
- }
- }
-
- // Step 3: Detect supertags (lines 103-140)
- this.detectSupertags(dump.docs, index, trash, supertags, tagColors);
-
- // Step 4: Detect fields (lines 143-146)
- this.detectFields(dump.docs, index, trash, fields);
-
- // Step 5: Extract inline references (implied from patching logic)
- this.extractInlineRefs(dump.docs, index, inlineRefs);
+ // Deferred candidates: nodes with SYS_A13 that need index lookups
+ // Tuple: [node, hasSysT01, hasSysT02]
+ const candidates: [NodeDump, boolean, boolean][] = [];
- // Step 6: Detect tag applications (which nodes have which tags)
- // This is the key missing piece - linking nodes to their applied supertags
- this.detectTagApplications(dump.docs, index, trash, tagApplications);
+ // Inline ref regex (compiled once)
+ const inlineRefPattern = /<\/span>/g;
- return { nodes: index, trash, supertags, fields, inlineRefs, tagColors, tagApplications };
- }
+ // Deferred inline refs: [sourceNodeId, rawMatches[]]
+ const deferredInlineRefs: [string, string[]][] = [];
- /**
- * Detect supertags from tuple structure
- * Ported from graph_view.py lines 103-140
- *
- * Supertag tuple pattern:
- * - children contains SYS_A13 (association marker)
- * - children contains SYS_T01 (supertag type marker)
- * - props._ownerId points to meta node
- * - meta node's _ownerId points to tag node
- * - Additional children (beyond SYS_A13, SYS_T01) are superclasses
- */
- private detectSupertags(
- docs: NodeDump[],
- index: Map,
- trash: Map,
- supertags: Map,
- tagColors: Map
- ): void {
- for (const node of docs) {
- // Skip if not in index (trashed or TRASH node itself)
- if (!index.has(node.id)) continue;
+ // Trash children for second-pass filtering
+ let trashChildIds: string[] | undefined;
- // Skip system nodes without children
- if (!node.children || node.id.includes("SYS")) continue;
+ // === SINGLE PASS: build index + collect candidates ===
+ for (let i = 0; i < docs.length; i++) {
+ const node = docs[i];
+ const id = node.id;
- // Check for supertag tuple marker (SYS_A13 + SYS_T01)
- if (
- !node.children.includes("SYS_A13") ||
- !node.children.includes("SYS_T01")
- ) {
+ // Trash detection
+ if (id.includes("TRASH")) {
+ trash.set(id, node);
+ if (node.children) trashChildIds = node.children;
continue;
}
- // Get owner ID (meta node)
- const ownerId = node.props._ownerId;
- if (!ownerId || trash.has(ownerId)) continue;
-
- const metaNode = index.get(ownerId);
- if (!metaNode) continue;
-
- // Get tag ID from meta node
- const tagId = metaNode.props._ownerId;
- if (!tagId || trash.has(tagId)) continue;
-
- const tagNode = index.get(tagId);
- if (!tagNode?.props.name) continue;
-
- const tagName = tagNode.props.name;
-
- // Extract superclasses (children beyond SYS markers)
- const superclasses: string[] = [];
- for (const childId of node.children) {
- if (childId.includes("SYS") || trash.has(childId)) continue;
-
- const superclass = index.get(childId);
- if (superclass?.props.name) {
- superclasses.push(superclass.props.name);
+ // Build index
+ index.set(id, node);
+
+ // Collect inline ref candidates (fast string check)
+ const name = node.props.name;
+ if (name && name.includes("data-inlineref-node")) {
+ inlineRefPattern.lastIndex = 0;
+ const targets: string[] = [];
+ let m: RegExpExecArray | null;
+ while ((m = inlineRefPattern.exec(name)) !== null) {
+ targets.push(m[1]);
}
+ if (targets.length > 0) deferredInlineRefs.push([id, targets]);
}
- // Store supertag tuple
- supertags.set(tagName, {
- nodeId: node.id,
- tagName,
- tagId,
- superclasses,
- color: node.color,
- });
+ // Collect tuple candidates (nodes with SYS_A13 in children)
+ const children = node.children;
+ if (!children || id.includes("SYS")) continue;
+
+ let hasSysA13 = false;
+ let hasSysT01 = false;
+ let hasSysT02 = false;
+ for (let j = 0; j < children.length; j++) {
+ const c = children[j];
+ if (c === "SYS_A13") hasSysA13 = true;
+ else if (c === "SYS_T01") hasSysT01 = true;
+ else if (c === "SYS_T02") hasSysT02 = true;
+ }
- // Store tag color if present
- if (node.color) {
- tagColors.set(tagName, node.color);
+ if (hasSysA13) {
+ candidates.push([node, hasSysT01, hasSysT02]);
}
}
- }
- /**
- * Detect fields from tuple structure
- * Similar to supertags but with SYS_T02 marker
- * Ported from graph_view.py lines 143-146
- */
- private detectFields(
- docs: NodeDump[],
- index: Map,
- trash: Map,
- fields: Map
- ): void {
- for (const node of docs) {
- if (!index.has(node.id)) continue;
- if (!node.children) continue;
-
- // Check for field tuple marker (SYS_A13 + SYS_T02)
- if (
- !node.children.includes("SYS_A13") ||
- !node.children.includes("SYS_T02")
- ) {
- continue;
+ // Mark trashed children
+ if (trashChildIds) {
+ for (const nodeId of trashChildIds) {
+ const node = index.get(nodeId);
+ if (node) trash.set(nodeId, node);
}
-
- const ownerId = node.props._ownerId;
- if (!ownerId || trash.has(ownerId)) continue;
-
- const metaNode = index.get(ownerId);
- if (!metaNode) continue;
-
- const fieldId = metaNode.props._ownerId;
- if (!fieldId || trash.has(fieldId)) continue;
-
- const fieldNode = index.get(fieldId);
- if (!fieldNode?.props.name) continue;
-
- const fieldName = fieldNode.props.name;
-
- fields.set(fieldName, {
- nodeId: node.id,
- fieldName,
- fieldId,
- });
}
- }
-
- /**
- * Extract inline references from node names
- * Pattern:
- * Ported from graph_view.py lines 63-76 (patch_node_name logic)
- */
- private extractInlineRefs(
- docs: NodeDump[],
- index: Map,
- inlineRefs: InlineReference[]
- ): void {
- const inlineRefPattern = /<\/span>/g;
- for (const node of docs) {
- if (!node.props.name) continue;
-
- const matches = [...node.props.name.matchAll(inlineRefPattern)];
- if (matches.length === 0) continue;
-
- // Extract all target IDs from matches
- const targetIds = matches
- .map((m) => m[1])
- .filter((id) => index.has(id)); // Only include valid node IDs
-
- if (targetIds.length > 0) {
- inlineRefs.push({
- sourceNodeId: node.id,
- targetNodeIds: targetIds,
- type: "inline_ref",
- });
+ // === RESOLVE: inline refs (filter to valid targets) ===
+ for (const [sourceId, targets] of deferredInlineRefs) {
+ const valid = targets.filter(id => index.has(id));
+ if (valid.length > 0) {
+ inlineRefs.push({ sourceNodeId: sourceId, targetNodeIds: valid, type: "inline_ref" });
}
}
- }
-
- /**
- * Detect tag applications (which nodes have which supertags applied)
- * Ported from graph_view.py build_master_pairs() lines 153-183
- *
- * Tag application pattern:
- * - Node children contains SYS_A13 (tag marker)
- * - Node children does NOT contain SYS_T01 (supertag definition)
- * - Node children does NOT contain SYS_T02 (field definition)
- * - Navigate: node.props._ownerId -> metaNode -> metaNode.props._ownerId -> dataNode
- * - The tag IDs are the non-SYS children
- */
- private detectTagApplications(
- docs: NodeDump[],
- index: Map,
- trash: Map,
- tagApplications: TagApplication[]
- ): void {
- // System constants (from tana-helper)
- const SYS_A13 = "SYS_A13"; // Tag marker
- const SYS_T01 = "SYS_T01"; // Supertag definition marker
- const SYS_T02 = "SYS_T02"; // Field definition marker
-
- for (const node of docs) {
- // Skip if not in index (trashed or system)
- if (!index.has(node.id)) continue;
- if (trash.has(node.id)) continue;
-
- // Skip system nodes and nodes without children
- if (!node.children || node.id.includes("SYS")) continue;
- // Check for tag application: has SYS_A13 but NOT SYS_T01 and NOT SYS_T02
- if (!node.children.includes(SYS_A13)) continue;
- if (node.children.includes(SYS_T01)) continue; // This is a tag definition
- if (node.children.includes(SYS_T02)) continue; // This is a field definition
-
- // This is a tag application tuple!
- // Navigate to find the data node
+ // === RESOLVE: tuple candidates (only ~1% of nodes) ===
+ for (const [node, hasSysT01, hasSysT02] of candidates) {
const ownerId = node.props._ownerId;
if (!ownerId || trash.has(ownerId)) continue;
- if (!index.has(ownerId)) continue;
-
- const metaNode = index.get(ownerId)!;
- const dataNodeId = metaNode.props._ownerId;
- if (!dataNodeId || trash.has(dataNodeId)) continue;
- if (!index.has(dataNodeId)) continue;
-
- // Extract tag IDs (non-SYS children)
- for (const childId of node.children) {
- if (childId.includes("SYS")) continue;
- if (trash.has(childId)) continue;
- if (!index.has(childId)) continue;
+ const metaNode = index.get(ownerId);
+ if (!metaNode) continue;
- // This is a tag ID - resolve the tag name
- const tagNode = index.get(childId);
- const tagName = tagNode?.props.name;
+ if (hasSysT01) {
+ // Supertag tuple
+ const tagId = metaNode.props._ownerId;
+ if (!tagId || trash.has(tagId)) continue;
+ const tagNode = index.get(tagId);
+ if (!tagNode?.props.name) continue;
+ const tagName = tagNode.props.name;
+
+ const superclasses: string[] = [];
+ for (const childId of node.children!) {
+ if (childId.includes("SYS") || trash.has(childId)) continue;
+ const sc = index.get(childId);
+ if (sc?.props.name) superclasses.push(sc.props.name);
+ }
- if (tagName) {
- tagApplications.push({
- tupleNodeId: node.id,
- dataNodeId,
- tagId: childId,
- tagName,
- });
+ supertags.set(tagName, { nodeId: node.id, tagName, tagId, superclasses, color: node.color });
+ if (node.color) tagColors.set(tagName, node.color);
+ } else if (hasSysT02) {
+ // Field tuple
+ const fieldId = metaNode.props._ownerId;
+ if (!fieldId || trash.has(fieldId)) continue;
+ const fieldNode = index.get(fieldId);
+ if (!fieldNode?.props.name) continue;
+ fields.set(fieldNode.props.name, { nodeId: node.id, fieldName: fieldNode.props.name, fieldId });
+ } else if (!trash.has(node.id)) {
+ // Tag application (has SYS_A13 but not T01/T02)
+ const dataNodeId = metaNode.props._ownerId;
+ if (!dataNodeId || trash.has(dataNodeId) || !index.has(dataNodeId)) continue;
+
+ for (const childId of node.children!) {
+ if (childId.includes("SYS") || trash.has(childId) || !index.has(childId)) continue;
+ const tagNode = index.get(childId);
+ const tagName = tagNode?.props.name;
+ if (tagName) {
+ tagApplications.push({ tupleNodeId: node.id, dataNodeId, tagId: childId, tagName });
+ }
}
}
}
+
+ return { nodes: index, trash, supertags, fields, inlineRefs, tagColors, tagApplications };
}
+
}