diff --git a/clis/web/read.js b/clis/web/read.js index d2a3dfeb..8e8f46f2 100644 --- a/clis/web/read.js +++ b/clis/web/read.js @@ -27,7 +27,7 @@ cli({ { name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' }, { name: 'wait', type: 'int', default: 3, help: 'Seconds to wait after page load' }, ], - columns: ['title', 'author', 'publish_time', 'status', 'size'], + columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'], func: async (page, kwargs) => { const url = kwargs.url; const waitSeconds = kwargs.wait ?? 3; diff --git a/clis/weixin/download.js b/clis/weixin/download.js index 2a502c0c..3cf64f73 100644 --- a/clis/weixin/download.js +++ b/clis/weixin/download.js @@ -179,12 +179,12 @@ cli({ { name: 'output', default: './weixin-articles', help: 'Output directory' }, { name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' }, ], - columns: ['title', 'author', 'publish_time', 'status', 'size'], + columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'], func: async (page, kwargs) => { const rawUrl = kwargs.url; const url = normalizeWechatUrl(rawUrl); if (!url.startsWith('https://mp.weixin.qq.com/')) { - return [{ title: 'Error', author: '-', publish_time: '-', status: 'invalid URL', size: '-' }]; + return [{ title: 'Error', author: '-', publish_time: '-', status: 'invalid URL', size: '-', saved: '-' }]; } // Navigate and wait for content to load await page.goto(url); @@ -297,6 +297,7 @@ cli({ publish_time: '-', status: 'failed — verification required in WeChat browser page', size: '-', + saved: '-', }]; } return downloadArticle({ diff --git a/skills/opencli-usage/SKILL.md b/skills/opencli-usage/SKILL.md index 14a700c3..ac5b07a8 100644 --- a/skills/opencli-usage/SKILL.md +++ b/skills/opencli-usage/SKILL.md @@ -78,7 +78,7 @@ opencli browser tab close 91F4D22A7C10... | **post/create** | Twitter, Jike, Douyin, Weibo | | **AI chat** | Grok, Doubao, ChatGPT, Gemini, Cursor, Codex, NotebookLM | | **finance/stock** | Xueqiu, Yahoo Finance, Barchart, Sina Finance, Bloomberg | -| **web scraping** | `opencli web read --url ` — any URL to Markdown | +| **web scraping** | `opencli web read --url ` — any URL to Markdown (saves to local file, check `saved` field in output) | | **GitHub/DevOps** | `opencli gh`, `opencli docker`, `opencli vercel` — external CLI passthrough | | **collaboration** | `opencli lark-cli`, `opencli dws`, `opencli wecom-cli` — external CLI passthrough | @@ -153,9 +153,9 @@ Type legend: 🌐 = Browser (needs Chrome login) · ✅ = Public API (no browser | **tiktok** | 🌐 | `explore` `search` `profile` `user` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `live` `notifications` `friends` | | **twitter** | 🌐 | `trending` `bookmarks` `search` `profile` `timeline` `lists` `list-tweets` `list-add` `list-remove` `thread` `article` `follow` `unfollow` `bookmark` `unbookmark` `post` `like` `likes` `reply` `delete` `block` `unblock` `followers` `following` `notifications` `hide-reply` `download` `accept` `reply-dm` | | **v2ex** | ✅🌐 | Public: `hot` `latest` `topic` `node` `nodes` `member` `user` `replies` · Browser: `daily` `me` `notifications` | -| **web** | 🌐 | `read` — any URL to Markdown | +| **web** | 🌐 | `read` — any URL to Markdown (saves to local file, not stdout) | | **weibo** | 🌐 | `hot` `search` `feed` `user` `me` `post` `comments` | -| **weixin** | 🌐 | `download` — 公众号 article to Markdown | +| **weixin** | 🌐 | `download` — 公众号 article to Markdown (saves to local file, not stdout) | | **wanfang** | 🌐 | `search` | | **weread** | 🌐 | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | | **wikipedia** | ✅ | `search` `summary` `random` `trending` | diff --git a/skills/opencli-usage/commands.md b/skills/opencli-usage/commands.md index e969a596..62c05db0 100644 --- a/skills/opencli-usage/commands.md +++ b/skills/opencli-usage/commands.md @@ -713,7 +713,7 @@ opencli v2ex notifications --limit 10 # 通知 ## Web 🌐 ```bash -opencli web read --url "https://..." # 抓取任意网页并导出为 Markdown +opencli web read --url "https://..." # 抓取任意网页并导出为 Markdown(默认保存到 ./web-articles/,可用 --output 修改) ``` ## Wanfang 🌐 @@ -737,7 +737,7 @@ opencli weibo comments # 微博评论 ## Weixin (微信公众号) 🌐 ```bash -opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" # 下载公众号文章为 Markdown +opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" # 下载公众号文章为 Markdown(默认保存到 ./weixin-articles/,可用 --output 修改) ``` ## WeRead (微信读书) 🌐 diff --git a/src/download/article-download.test.ts b/src/download/article-download.test.ts new file mode 100644 index 00000000..6c6bff02 --- /dev/null +++ b/src/download/article-download.test.ts @@ -0,0 +1,43 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { downloadArticle } from './article-download.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + for (const dir of tempDirs) { + try { + fs.rmSync(dir, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors in tests. + } + } + tempDirs.length = 0; +}); + +describe('downloadArticle', () => { + it('returns the saved markdown file path on success', async () => { + const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'opencli-article-')); + tempDirs.push(tempDir); + + const result = await downloadArticle({ + title: 'Test Article', + author: 'Author', + publishTime: '2026-04-20 12:00:00', + sourceUrl: 'https://example.com/article', + contentHtml: '

Hello world

', + }, { + output: tempDir, + downloadImages: false, + }); + + expect(result).toHaveLength(1); + expect(result[0].status).toBe('success'); + expect(result[0].saved).toMatch(new RegExp(`^${tempDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}`)); + expect(path.extname(result[0].saved)).toBe('.md'); + expect(fs.existsSync(result[0].saved)).toBe(true); + expect(fs.readFileSync(result[0].saved, 'utf8')).toContain('Hello world'); + }); +}); diff --git a/src/download/article-download.ts b/src/download/article-download.ts index dda6af7c..1b30304f 100644 --- a/src/download/article-download.ts +++ b/src/download/article-download.ts @@ -56,6 +56,7 @@ export interface ArticleDownloadResult { publish_time: string; status: string; size: string; + saved: string; } const DEFAULT_LABELS: Required = { @@ -212,6 +213,7 @@ export async function downloadArticle( publish_time: '-', status: 'failed — no title', size: '-', + saved: '-', }]; } @@ -222,6 +224,7 @@ export async function downloadArticle( publish_time: data.publishTime || '-', status: 'failed — no content', size: '-', + saved: '-', }]; } @@ -268,5 +271,6 @@ export async function downloadArticle( publish_time: data.publishTime || '-', status: 'success', size: formatBytes(size), + saved: filePath, }]; }