Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions scripts/enrich-linkedin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import fs from 'fs';
import path from 'path';
import matter from 'gray-matter';

async function main() {
const [,, targetPath, linkedinUrl, rawJsonPayload] = process.argv;

if (!targetPath || !linkedinUrl || !rawJsonPayload) {
console.error('Usage: bun run scripts/enrich-linkedin.ts <path-to-markdown> <linkedin-url> <json-payload>');
process.exit(1);
}

const fullPath = path.resolve(targetPath);
if (!fs.existsSync(fullPath)) {
console.error(`File not found: ${fullPath}`);
process.exit(1);
}

const parsedJson = JSON.parse(rawJsonPayload);

// 1. Update the Main Person Page
let fileContent = fs.readFileSync(fullPath, 'utf8');
const parsed = matter(fileContent);
const data = parsed.data;

// Add alias
if (!data.aliases) data.aliases = [];
if (!data.aliases.includes(linkedinUrl)) data.aliases.push(linkedinUrl);

let content = parsed.content;

// Determine slug for the new profile file
const slug = path.basename(fullPath, '.md');
const profileFileName = `${slug}-linkedin.md`;
const profileFilePath = path.join(path.dirname(fullPath), 'linkedin-profiles', profileFileName);

// Build the state snippet
const rolesSummary = parsedJson.currentRole || parsedJson.headline || "Role info unavailable";
const location = parsedJson.location ? `\n- Location: ${parsedJson.location}` : "";
const education = parsedJson.education ? `\n- Education: ${parsedJson.education}` : "";

const newStateContent = `- LinkedIn: [Profile](${linkedinUrl}) | [Raw Extraction](linkedin-profiles/${profileFileName})\n- Current Role: ${rolesSummary}${location}${education}\n`;

const stateRegex = /## State\n([\s\S]*?)(?=\n## |$)/;
const match = content.match(stateRegex);

if (match) {
let stateContent = match[1];
// Remove old linkedin lines to avoid duplicates
stateContent = stateContent.split('\n').filter(l => !l.includes('- LinkedIn:') && !l.includes('- Current Role:') && !l.includes('- Location:') && !l.includes('- Education:')).join('\n');
stateContent = stateContent.trim() + `\n${newStateContent}`;
content = content.replace(stateRegex, `## State\n${stateContent}\n`);
} else {
content += `\n## State\n${newStateContent}`;
}

// Write back main file
const newFileContent = matter.stringify(content, data);
fs.writeFileSync(fullPath, newFileContent);

// 2. Create the raw profile file
const profileDir = path.dirname(profileFilePath);
if (!fs.existsSync(profileDir)) {
fs.mkdirSync(profileDir, { recursive: true });
}

const profileFrontmatter = {
type: "linkedin-profile",
title: `LinkedIn: ${data.title || slug}`,
person: `../${slug}.md`,
url: linkedinUrl,
extracted_at: new Date().toISOString()
};

let profileContent = `# LinkedIn Profile: ${data.title || slug}\n\n`;
profileContent += `This is the raw LinkedIn extraction for [${data.title || slug}](../${slug}.md).\n\n`;
profileContent += `## Extracted Data\n\n`;

for (const [key, value] of Object.entries(parsedJson)) {
profileContent += `### ${key}\n${value}\n\n`;
}

const newProfileFileContent = matter.stringify(profileContent, profileFrontmatter);
fs.writeFileSync(profileFilePath, newProfileFileContent);

console.log(`Successfully updated ${fullPath} and created ${profileFilePath}`);
}

main().catch(console.error);
1 change: 1 addition & 0 deletions skills/RESOLVER.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This is the dispatcher. Skills are the implementation. **Read the skill file bef
| "What do we know about", "tell me about", "search for", "who is", "background on", "notes on" | `skills/query/SKILL.md` |
| "Who knows who", "relationship between", "connections", "graph query" | `skills/query/SKILL.md` (use graph-query) |
| Creating/enriching a person or company page | `skills/enrich/SKILL.md` |
| "find their linkedin", "enrich linkedin for" | `skills/linkedin-enrichment/SKILL.md` |
| Where does a new file go? Filing rules | `skills/repo-architecture/SKILL.md` |
| Fix broken citations in brain pages | `skills/citation-fixer/SKILL.md` |
| "citation audit", "check citations", "fix citations" | `skills/citation-fixer/SKILL.md` (focused fix). For broader brain health, chain into `skills/maintain/SKILL.md` |
Expand Down
2 changes: 1 addition & 1 deletion skills/enrich/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ Priority order -- stop when you have enough signal for the entity's tier.
- Social media is the highest-texture signal for what someone actually thinks

**4d. People enrichment APIs (Tier 1)**
- LinkedIn data, career history, connections, education
- LinkedIn data, career history, connections, education. For finding and appending LinkedIn profiles specifically, use the `linkedin-enrichment` skill (`skills/linkedin-enrichment/SKILL.md`).

**4e. Company enrichment APIs (Tier 1)**
- Company data, financials, headcount, key hires, recent news
Expand Down
47 changes: 47 additions & 0 deletions skills/linkedin-enrichment/SKILL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
name: linkedin-enrichment
version: 1.1.0
description: |
Finds a person's LinkedIn profile via web search, extracts full profile data
(roles, education, location, summaries), creates a raw extraction record,
and safely updates their brain page frontmatter (aliases) and State section.
triggers:
- "find their linkedin"
- "enrich linkedin for"
- "get linkedin profile"
tools:
- web_search
- read
- exec
mutating: true
---

# LinkedIn Enrichment

This skill isolates the execution of finding and appending LinkedIn data to a person's brain page, while also retaining the raw extracted data.

## Contract
- Modifies a person's markdown file to include their LinkedIn URL.
- Appends `https://www.linkedin.com/in/<handle>` to the `aliases:` block in YAML frontmatter.
- Updates the `State` section with the URL, current roles, location, and education.
- Creates a dedicated raw extraction markdown file at `people/linkedin-profiles/<slug>-linkedin.md`.
- Links the raw profile back to the main person page, and links the main page to the raw profile.

## Phases

### Phase 1: Search
- Use `web_search` with the query `"<Name>" LinkedIn` or `"<Name>" "<Company>" LinkedIn`.
- Extract ALL available structured data from the search snippets (current role, headline, past roles, location, education, summary, connection count, etc.).

### Phase 2: Package Data
- Structure the extracted data into a JSON payload. The script accepts a single JSON string.
- Recommended keys: `currentRole`, `location`, `education`, `headline`, `summary`, `experience`.

### Phase 3: Patch Brain Page & Create Raw Profile
- Execute the patching script:
`bun run scripts/enrich-linkedin.ts <path-to-person.md> <linkedin-url> '<json-string>'`
- The script automatically writes the main page updates and creates the `linkedin-profiles/` file.

## See Also
- [Enrich Skill](../enrich/SKILL.md) - This skill is typically chained from the master enrich skill.
- [Filing Rules](../_brain-filing-rules.md) - Contains standard directory layout.
7 changes: 6 additions & 1 deletion skills/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@
"name": "voice-note-ingest",
"path": "voice-note-ingest/SKILL.md",
"description": "Ingest voice notes with exact-phrasing preservation (never paraphrased); routes content based on a decision tree across originals/concepts/people/companies/ideas/personal/voice-notes."
},
{
"name": "linkedin-enrichment",
"path": "linkedin-enrichment/SKILL.md",
"description": "Finds a person's LinkedIn profile via web search, extracts current roles, and safely updates their brain page."
}
],
"dependencies": {
Expand All @@ -212,4 +217,4 @@
"resolver": "RESOLVER.md",
"conventions_dir": "conventions/",
"templates_dir": "../templates/"
}
}
Empty file modified src/cli.ts
100644 → 100755
Empty file.
14 changes: 8 additions & 6 deletions src/commands/doctor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as db from '../core/db.ts';
import { LATEST_VERSION, getIdleBlockers } from '../core/migrate.ts';
import { checkResolvable } from '../core/check-resolvable.ts';
import { autoFixDryViolations, type AutoFixReport, type FixOutcome } from '../core/dry-fix.ts';
import { findRepoRoot } from '../core/repo-root.ts';
import { autoDetectSkillsDir } from '../core/repo-root.ts';
import { loadCompletedMigrations } from '../core/preferences.ts';
import { compareVersions } from './migrations/index.ts';
import { createProgress, startHeartbeat, type ProgressReporter } from '../core/progress.ts';
Expand Down Expand Up @@ -59,9 +59,12 @@ export async function runDoctor(engine: BrainEngine | null, args: string[], dbSo
// --- Filesystem checks (always run, no DB needed) ---

// 1. Resolver health
const repoRoot = findRepoRoot();
if (repoRoot) {
const skillsDir = join(repoRoot, 'skills');
// Use the same auto-detect as `check-resolvable` so doctor sees a
// workspace/skills dir reachable via $OPENCLAW_WORKSPACE or
// ~/.openclaw/workspace, not just a `skills/` walked up from cwd.
const detected = autoDetectSkillsDir();
const skillsDir = detected.dir;
if (skillsDir) {

// --fix: run auto-repair BEFORE checkResolvable so the post-fix scan
// reflects the new state. Auto-fix only targets DRY violations today;
Expand Down Expand Up @@ -99,8 +102,7 @@ export async function runDoctor(engine: BrainEngine | null, args: string[], dbSo
}

// 2. Skill conformance
if (repoRoot) {
const skillsDir = join(repoRoot, 'skills');
if (skillsDir) {
const conformanceResult = checkSkillConformance(skillsDir);
checks.push(conformanceResult);
}
Expand Down
16 changes: 16 additions & 0 deletions src/commands/sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,22 @@ export async function runSync(engine: BrainEngine, args: string[]) {
process.exit(1);
}

// --skip-failed: acknowledge pre-existing unacked failures BEFORE the sync
// runs, not only ones the current run produces. Without this, the common
// recovery flow — fix the YAML, re-run sync, then run --skip-failed to
// clear the log — fails to clear anything: when there are no NEW failures
// (because the files are now fixed), the inner ack path in performSync is
// never reached, and "Already up to date." leaves the log untouched. Both
// doctor and printSyncResult instruct users to run --skip-failed in
// exactly this case, so the flag has to handle stale entries up-front.
if (skipFailed) {
const stale = unacknowledgedSyncFailures();
if (stale.length > 0) {
const acked = acknowledgeSyncFailures();
console.log(`Acknowledged ${acked.count} pre-existing failure(s).`);
}
}

// v0.18.0 Step 5: --source resolves to a sources(id) row. Falls back
// to pre-v0.17 global config (sync.repo_path + sync.last_commit) when
// no flag, no env, no dotfile is present.
Expand Down
50 changes: 42 additions & 8 deletions src/core/pglite-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,12 @@ export class PGLiteEngine implements BrainEngine {
* - `links.origin_page_id` column (indexed by `idx_links_origin`) — v0.13
* - `content_chunks.symbol_name` column (indexed by `idx_chunks_symbol_name`) — v0.19
* - `content_chunks.language` column (indexed by `idx_chunks_language`) — v0.19
* - `content_chunks.search_vector` + `parent_symbol_path` + `doc_comment`
* + `symbol_name_qualified` columns (indexed by `idx_chunks_search_vector`
* and `idx_chunks_symbol_qualified`) — v0.20 Cathedral II
* - `pages.deleted_at` column (indexed by `pages_deleted_at_purge_idx`) — v0.26.5
* - `mcp_request_log.agent_name` + `params` + `error_message` columns
* (indexed by `idx_mcp_log_agent_time`) — v0.26.3
*
* **Maintenance contract:** when a future migration adds a column-with-index
* or new-table-with-FK referenced by PGLITE_SCHEMA_SQL, extend this method
Expand Down Expand Up @@ -245,7 +250,13 @@ export class PGLiteEngine implements BrainEngine {
EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_schema='public' AND table_name='content_chunks' AND column_name='symbol_name') AS symbol_name_exists,
EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_schema='public' AND table_name='content_chunks' AND column_name='language') AS language_exists
WHERE table_schema='public' AND table_name='content_chunks' AND column_name='language') AS language_exists,
EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_schema='public' AND table_name='content_chunks' AND column_name='search_vector') AS search_vector_exists,
EXISTS (SELECT 1 FROM information_schema.tables
WHERE table_schema='public' AND table_name='mcp_request_log') AS mcp_log_exists,
EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_schema='public' AND table_name='mcp_request_log' AND column_name='agent_name') AS agent_name_exists
`);
const probe = rows[0] as {
pages_exists: boolean;
Expand All @@ -257,17 +268,22 @@ export class PGLiteEngine implements BrainEngine {
chunks_exists: boolean;
symbol_name_exists: boolean;
language_exists: boolean;
search_vector_exists: boolean;
mcp_log_exists: boolean;
agent_name_exists: boolean;
};

const needsPagesBootstrap = probe.pages_exists && !probe.source_id_exists;
const needsLinksBootstrap = probe.links_exists
&& (!probe.link_source_exists || !probe.origin_page_id_exists);
const needsChunksBootstrap = probe.chunks_exists
&& (!probe.symbol_name_exists || !probe.language_exists);
&& (!probe.symbol_name_exists || !probe.language_exists || !probe.search_vector_exists);
const needsPagesDeletedAt = probe.pages_exists && !probe.deleted_at_exists;
// v0.26.3 (v33): idx_mcp_log_agent_time in PGLITE_SCHEMA_SQL needs agent_name col.
const needsMcpLogBootstrap = probe.mcp_log_exists && !probe.agent_name_exists;

// Fresh installs (no tables yet) and modern brains both no-op.
if (!needsPagesBootstrap && !needsLinksBootstrap && !needsChunksBootstrap && !needsPagesDeletedAt) return;
if (!needsPagesBootstrap && !needsLinksBootstrap && !needsChunksBootstrap && !needsPagesDeletedAt && !needsMcpLogBootstrap) return;

console.log(' Pre-v0.21 brain detected, applying forward-reference bootstrap');

Expand Down Expand Up @@ -305,14 +321,19 @@ export class PGLiteEngine implements BrainEngine {
}

if (needsChunksBootstrap) {
// v26 (content_chunks_code_metadata) adds the full code-chunk metadata
// surface (language, symbol_name, symbol_type, start_line, end_line).
// The bootstrap only adds the two columns the schema blob's partial
// indexes reference (idx_chunks_symbol_name, idx_chunks_language).
// v26 runs later via runMigrations and adds the rest idempotently.
// v26 (content_chunks_code_metadata) adds symbol_name + language; v27
// (Cathedral II) adds parent_symbol_path + doc_comment +
// symbol_name_qualified + search_vector. PGLITE_SCHEMA_SQL has indexes
// (idx_chunks_search_vector, idx_chunks_symbol_qualified) that need the
// v27 columns to exist before they run. v26 + v27 run later via
// runMigrations and are idempotent.
await this.db.exec(`
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS language TEXT;
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS symbol_name TEXT;
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS parent_symbol_path TEXT[];
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS doc_comment TEXT;
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS symbol_name_qualified TEXT;
ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS search_vector TSVECTOR;
`);
}

Expand All @@ -325,6 +346,19 @@ export class PGLiteEngine implements BrainEngine {
ALTER TABLE pages ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
`);
}

if (needsMcpLogBootstrap) {
// v33 (admin_dashboard_columns_v0_26_3) adds agent_name + params +
// error_message to mcp_request_log. PGLITE_SCHEMA_SQL's
// `CREATE INDEX idx_mcp_log_agent_time ON mcp_request_log(agent_name,...)`
// crashes without agent_name. v33 runs later via runMigrations and is
// idempotent (and also handles backfill).
await this.db.exec(`
ALTER TABLE mcp_request_log ADD COLUMN IF NOT EXISTS agent_name TEXT;
ALTER TABLE mcp_request_log ADD COLUMN IF NOT EXISTS params JSONB;
ALTER TABLE mcp_request_log ADD COLUMN IF NOT EXISTS error_message TEXT;
`);
}
}

async withReservedConnection<T>(fn: (conn: ReservedConnection) => Promise<T>): Promise<T> {
Expand Down
Loading