diff --git a/.gitignore b/.gitignore index cda71d9..86d98da 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ # Dependency directories (remove the comment below to include it) # vendor/ bin/ +dist/ feishu2md feishu2md4web .env @@ -25,4 +26,6 @@ app node_modules .next .vercel -.vscode/** \ No newline at end of file +.vscode/** + +.claude/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..cdc72db --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,143 @@ +# CLAUDE.md + +本文件为 Claude Code (claude.ai/code) 在处理此仓库代码时提供指导。 + +## 项目概述 + +**feishu2md** 是一个基于 Go 的工具,用于将飞书(LarkSuite)文档转换为 Markdown 格式。它支持 CLI 和 Web 服务两种部署模式。该项目使用飞书开放 API(通过 `chyroc/lark` SDK)获取文档内容,并将飞书基于块的文档结构转换为 Markdown。 + +## 开发命令 + +### 构建 +```bash +make build # 构建 CLI 二进制文件(输出: feishu2md) +make server # 构建 Web 服务二进制文件(输出: feishu2md4web) +make all # 构建 CLI 和 Web 二进制文件 +``` + +CLI 二进制文件通过 `-ldflags="-X main.version=v2-"` 在版本字符串中嵌入 git commit hash。 + +### 测试 +```bash +make test # 运行所有测试: go test ./... +make format # 格式化代码: gofmt -l -w . +``` + +测试需要飞书凭证(FEISHU_APP_ID 和 FEISHU_APP_SECRET 环境变量)。测试固件位于 `testdata/` 目录,包含配对的 JSON(API 响应)和 MD(预期输出)文件。 + +### Docker +```bash +make image # 构建 Docker 镜像(标签: feishu2md) +make docker # 在端口 8080 上运行 Docker 容器 +``` + +Web 服务需要环境变量: `FEISHU_APP_ID`、`FEISHU_APP_SECRET`、`GIN_MODE`。 + +## 架构 + +代码库遵循三层架构: + +### 1. 接口层(CLI 或 Web) +- **cmd/** - 使用 `urfave/cli/v2` 框架的 CLI 应用 + - `cmd/main.go`: 入口点,包含 config 和 download 命令 + - `cmd/config.go`: 读写配置到 `~/.config/feishu2md/config.json` + - `cmd/download.go`: 协调单个/批量/wiki 下载,支持并发 +- **web/** - 使用 `gin-gonic/gin` 的 Web 服务 + - `web/main.go`: 端口 8080 上的 HTTP 服务器 + - `web/download.go`: `/download?url=` 处理器,返回 ZIP 或 MD 文件 + +### 2. 核心业务逻辑 +- **core/client.go**: 通过 `chyroc/lark` SDK 封装飞书 API 调用 + - `GetDocxContent()`: 获取文档元数据和所有块(处理分页) + - `GetWikiNodeInfo()`: 检索 wiki 节点元数据 + - `GetWikiNodeList()`: 递归列出所有子节点(跟踪 `previousPageToken` 以防止无限循环) + - `GetDriveFolderFileList()`: 分页列出文件夹内容 + - `DownloadImage()`: 通过 token 下载图片到文件系统 +- **core/parser.go**: 将飞书块结构转换为 Markdown + - `Parse()`: 主入口点,构建块映射并递归处理 + - 处理嵌套块(标题、列表、表格、代码、图片等) + - 在 `parser.ImgTokens` 中收集图片 token 以供批量下载 + - 使用 `github.com/88250/lute` 进行最终的 Markdown 格式化 +- **core/config.go**: 配置结构和文件持久化 + +### 3. 数据流 + +``` +用户输入(URL) → 验证(utils/url.go) + ↓ + 客户端(飞书 API 调用,带速率限制) + ↓ + 解析器(飞书块 → Markdown) + ↓ + 输出(文件或带图片的 ZIP) +``` + +### 关键架构模式 + +**分页处理**: 所有返回列表的 API 调用都使用 `HasMore` 和 `PageToken` 字段处理分页。Wiki 节点列表跟踪 `previousPageToken` 以防止无限循环(最近的 bug 修复)。 + +**速率限制**: 客户端使用 `lark_rate_limiter.Wait(4, 4)` 中间件,限制为每 4 秒 4 个请求。 + +**并发**: +- 批量文件夹下载: 使用 `sync.WaitGroup` 和 goroutine 并行下载文档 +- Wiki 下载: 使用信号量模式,最大并发数为 10,以防止 API 限流 + +**块解析**: 飞书文档是块的树结构。解析器: +1. 构建 `blockMap`(ID → block)以实现 O(1) 子块查找 +2. 递归处理每种块类型(Page → Heading → Text → List 等) +3. 处理文本块内的内联元素(粗体、斜体、链接、公式、提及) +4. 将表格转换为 HTML(支持 colspan/rowspan) +5. 在解析过程中收集图片 token,之后下载 + +## 重要实现细节 + +### 配置 +- **CLI**: 配置存储在 `~/.config/feishu2md/config.json`(XDG 配置目录) +- **Web**: 环境变量(`FEISHU_APP_ID`、`FEISHU_APP_SECRET`) +- 配置结构包括 `feishu`(凭证)和 `output`(image_dir、title_as_filename、use_html_tags、skip_img_download) + +### URL 模式 +工具通过 `utils/url.go` 中的正则表达式验证三种 URL 类型: +- 文档: `https://domain.feishu.cn/docx/` +- 文件夹: `https://domain.feishu.cn/drive/folder/` +- Wiki: `https://domain.feishu.cn/wiki/settings/` + +### 文档块类型 +解析器处理以下飞书块类型(参见 `core/parser.go`): +- Page、Heading(h1-h9)、Text、Code、Quote、Todo、Callout +- Bullet/Ordered/Numbered 列表(带嵌套子项) +- Table(渲染为 HTML,使用 tablewriter) +- Image(基于 token,单独下载) +- Divider、Grid(列布局) + +内联元素: TextRun(样式文本)、Mention(用户/文档)、Equation(LaTeX) + +### 文件名清理 +当使用 `title_as_filename` 选项时,`utils.SanitizeFileName()` 会从文档标题中删除无效字符(`/\:*?"<>|`)。 + +## CI/CD + +`.github/workflows/` 中的 GitHub Actions 工作流: +- **unittest.yaml**(PR 时): gofmt 检查、测试、构建验证 +- **release.yaml**(发布时): 跨平台构建(Linux/Windows/Darwin amd64+arm64)、Docker Hub 推送(`wwwsine/feishu2md`) + +使用 `wangyoucao577/go-release-action` 进行矩阵构建,支持 UPX 压缩。 + +## 依赖项 + +主要外部依赖: +- `github.com/chyroc/lark@v0.0.98` - 飞书 SDK(API 封装) +- `github.com/chyroc/lark_rate_limiter@v0.1.0` - 速率限制中间件 +- `github.com/urfave/cli/v2@v2.6.0` - CLI 框架 +- `github.com/gin-gonic/gin@v1.9.0` - Web 框架 +- `github.com/88250/lute@v1.7.3` - Markdown 格式化器 +- `github.com/olekukonko/tablewriter@v0.0.5` - ASCII 表格渲染 + +## 项目状态 + +社区维护项目(原作者不再使用飞书)。最近的修复包括: +- Wiki 分页无限循环防护(#139) +- 速率限制增加(#137) +- 文件名清理(#117) + +欢迎提交 PR,活跃的维护者可能成为协调者。 diff --git a/Makefile b/Makefile index 9ec32ce..9866437 100644 --- a/Makefile +++ b/Makefile @@ -35,3 +35,45 @@ format: .PHONY: all all: build server @echo "Build all done" + +# 跨平台编译配置 +VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "v2-dev") +LDFLAGS := -s -w -X main.version=$(VERSION) +OUTPUT_DIR := dist + +.PHONY: cross-build +cross-build: clean-dist + @echo "Building for all platforms..." + @mkdir -p $(OUTPUT_DIR) + # Linux amd64 + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-linux-amd64 cmd/*.go + # Linux arm64 + CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-linux-arm64 cmd/*.go + # Windows amd64 + CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-windows-amd64.exe cmd/*.go + # Darwin amd64 + CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-darwin-amd64 cmd/*.go + # Darwin arm64 + CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-darwin-arm64 cmd/*.go + @echo "Cross-build completed! Binaries in $(OUTPUT_DIR)/" + +.PHONY: cross-build-linux +cross-build-linux: clean-dist + @mkdir -p $(OUTPUT_DIR) + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-linux-amd64 cmd/*.go + CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-linux-arm64 cmd/*.go + +.PHONY: cross-build-windows +cross-build-windows: clean-dist + @mkdir -p $(OUTPUT_DIR) + CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-windows-amd64.exe cmd/*.go + +.PHONY: cross-build-darwin +cross-build-darwin: clean-dist + @mkdir -p $(OUTPUT_DIR) + CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-darwin-amd64 cmd/*.go + CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o $(OUTPUT_DIR)/feishu2md-darwin-arm64 cmd/*.go + +.PHONY: clean-dist +clean-dist: + rm -rf $(OUTPUT_DIR) diff --git a/README.md b/README.md index ce87f5c..0258c65 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,100 @@ - [获取知识空间节点信息](https://open.feishu.cn/document/server-docs/docs/wiki-v2/space-node/get_node),「查看知识库」权限 `wiki:wiki:readonly` - 打开凭证与基础信息,获取 App ID 和 App Secret +## 鉴权方式 + +feishu2md 支持两种鉴权方式:**应用鉴权**和**用户鉴权**。 + +### 应用鉴权(默认) + +使用飞书应用的 App ID 和 App Secret 进行鉴权。 + +**适用场景**: +- 批量下载多个文档 +- 长期使用的自动化任务 +- 访问应用有权限的文档 + +**配置方法**: +```bash +feishu2md config --appId "cli_xxxxx" --appSecret "xxxxx" --authType "app" +``` + +**配置文件示例**: +```json +{ + "feishu": { + "app_id": "cli_xxxxx", + "app_secret": "xxxxx", + "auth_type": "app" + }, + "output": { + "image_dir": "static", + "title_as_filename": false, + "use_html_tags": false, + "skip_img_download": false + } +} +``` + +### 用户鉴权 + +使用个人用户访问令牌(User Access Token)进行鉴权。 + +**适用场景**: +- 访问个人私有文档 +- 机器人权限不足时的替代方案 +- 临时下载任务 + +**配置方法**: +```bash +# 设置用户访问令牌和鉴权类型 +feishu2md config --userAccessToken "u-xxxxx" --authType "user" + +# 或使用简写 +feishu2md config --uat "u-xxxxx" --authType "user" +``` + +**配置文件示例**: +```json +{ + "feishu": { + "user_access_token": "u-xxxxx", + "auth_type": "user" + }, + "output": { + "image_dir": "static", + "title_as_filename": false, + "use_html_tags": false, + "skip_img_download": false + } +} +``` + +**获取用户访问令牌**: + +用户访问令牌需要通过飞书开放平台的 OAuth 2.0 授权流程获取。详细步骤请参考: +- [飞书开放平台 - 获取 user_access_token](https://open.feishu.cn/document/server-docs/api-call-guide/calling-process/get-access-token) + +**注意事项**: +- 用户访问令牌有效期较短(通常为 2 小时),过期后需要重新获取 +- 令牌过期时,使用 `feishu2md config --uat "new-token"` 更新配置 +- 不要将包含令牌的配置文件提交到版本控制系统 + +### 切换鉴权方式 + +配置文件可以同时保存两种鉴权方式的凭证,通过 `auth_type` 字段灵活切换: + +```bash +# 切换到用户鉴权 +feishu2md config --authType "user" + +# 切换回应用鉴权 +feishu2md config --authType "app" + +# 查看当前配置 +feishu2md config +``` + ## 如何使用 注意:飞书旧版文档的下载工具已决定不再维护,但分支 [v1_support](https://github.com/Wsine/feishu2md/tree/v1_support) 仍可使用,对应的归档为 [v1.4.0](https://github.com/Wsine/feishu2md/releases/tag/v1.4.0),请知悉。 @@ -75,10 +169,10 @@ $ feishu2md dl -h NAME: feishu2md download - Download feishu/larksuite document to markdown file - + USAGE: feishu2md download [command options] - + OPTIONS: --output value, -o value Specify the output directory for the markdown files (default: "./") --dump Dump json response of the OPEN API (default: false) @@ -86,6 +180,23 @@ --wiki Download all documents within the wiki. (default: false) --help, -h show help (default: false) + $ feishu2md sync -h + NAME: + feishu2md sync - Sync feishu/larksuite folder or wiki to local directory + + USAGE: + feishu2md sync [command options] [url] + + OPTIONS: + --output value, -o value Specify the output directory (default: "./") + --incremental, -i Enable incremental sync (default: true) + --force, -f Force re-download all documents (default: false) + --include value Only sync directories matching patterns (comma-separated) + --exclude value Exclude directories matching patterns (comma-separated) + --concurrency value, -c Maximum concurrent downloads (default: 5) + --dump Dump json response of the OPEN API (default: false) + --help, -h show help (default: false) + ``` **生成配置文件** @@ -130,6 +241,80 @@ +
+ sync 命令(推荐) + + `sync` 命令是推荐的批量同步方式,支持增量下载、目录过滤等高级功能。 + + **基本用法** + + ```bash + # 同步知识库(自动检测 URL 类型) + $ feishu2md sync "https://domain.feishu.cn/wiki/settings/123456789101112" + + # 同步云盘文件夹 + $ feishu2md sync "https://domain.feishu.cn/drive/folder/foldertoken" + + # 指定输出目录 + $ feishu2md sync -o ./output "https://domain.feishu.cn/wiki/settings/xxx" + ``` + + **增量同步** + + sync 命令默认开启增量模式,只下载有更新的文档: + + ```bash + # 增量同步(默认行为,跳过未修改的文档) + $ feishu2md sync "https://domain.feishu.cn/wiki/settings/xxx" + + # 强制重新下载所有文档 + $ feishu2md sync -f "https://domain.feishu.cn/wiki/settings/xxx" + ``` + + **目录过滤** + + 支持通过 `--include` 和 `--exclude` 参数过滤目录: + + ```bash + # 仅同步包含"文档"的目录 + $ feishu2md sync --include "*文档*" "https://domain.feishu.cn/wiki/settings/xxx" + + # 排除草稿和测试目录 + $ feishu2md sync --exclude "*草稿*,*测试*" "https://domain.feishu.cn/wiki/settings/xxx" + + # 组合使用:仅同步技术相关目录,但排除草稿 + $ feishu2md sync --include "技术*,API*" --exclude "*草稿*" "https://domain.feishu.cn/wiki/settings/xxx" + ``` + + 通配符语法: + - `*` 匹配任意字符 + - `?` 匹配单个字符 + - `[abc]` 匹配指定字符 + + **并发控制** + + ```bash + # 设置并发数为 10(默认为 5) + $ feishu2md sync -c 10 "https://domain.feishu.cn/wiki/settings/xxx" + ``` + + **同步配置持久化** + + sync 命令会自动在输出目录保存同步配置(`.feishu2md.sync.json`),后续可以省略 URL: + + ```bash + # 首次同步 + $ feishu2md sync -o ./docs "https://domain.feishu.cn/wiki/settings/xxx" + + # 后续同步(使用已保存的配置) + $ feishu2md sync -o ./docs + + # 或者在输出目录下直接运行 + $ cd ./docs && feishu2md sync + ``` + +
+
Docker版本 @@ -162,3 +347,97 @@ - [chyroc/lark](https://github.com/chyroc/lark) - [chyroc/lark_docs_md](https://github.com/chyroc/lark_docs_md) + +## 开发指南 + +### 环境要求 + +- Go 1.21+ + +### 构建 + +```bash +# 构建 CLI +make build + +# 构建 Web 服务 +make server + +# 构建全部 +make all +``` + +### 测试 + +```bash +# 运行所有测试 +make test + +# 代码格式化 +make format +``` + +注意:部分测试需要有效的飞书凭证(环境变量 `FEISHU_APP_ID` 和 `FEISHU_APP_SECRET`)。 + +### 跨平台编译 + +```bash +# 编译所有平台 +make cross-build + +# 仅编译 Linux +make cross-build-linux + +# 仅编译 Windows +make cross-build-windows + +# 仅编译 macOS +make cross-build-darwin +``` + +支持的平台: +| 平台 | 架构 | +|------|------| +| Linux | amd64, arm64 | +| Windows | amd64 | +| macOS | amd64, arm64 | + +### Docker + +```bash +# 构建镜像 +make image + +# 运行容器 +make docker +``` + +### 项目结构 + +``` +feishu2md/ +├── cmd/ # CLI 入口和命令 +│ ├── main.go # 命令行入口 +│ ├── download.go # 下载命令 +│ ├── sync.go # 同步命令 +│ └── config.go # 配置命令 +├── core/ # 核心业务逻辑 +│ ├── client.go # 飞书 API 客户端 +│ ├── parser.go # 文档解析器 +│ ├── filter.go # 目录过滤器 +│ ├── cache.go # 缓存管理 +│ ├── config.go # 全局配置 +│ └── sync_config.go # 同步配置 +├── utils/ # 工具函数 +├── web/ # Web 服务 +└── testdata/ # 测试数据 +``` + +### 贡献 + +欢迎提交 PR!请确保: + +1. 代码通过 `make format` 格式化 +2. 所有测试通过 `make test` +3. 新功能包含相应的测试用例 + diff --git a/cmd/config.go b/cmd/config.go index ff35665..3b809ea 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -9,8 +9,10 @@ import ( ) type ConfigOpts struct { - appId string - appSecret string + appId string + appSecret string + userAccessToken string + authType string } var configOpts = ConfigOpts{} @@ -23,23 +25,55 @@ func handleConfigCommand() error { fmt.Println("Configuration file on: " + configPath) if _, err := os.Stat(configPath); os.IsNotExist(err) { + // 创建新配置 config := core.NewConfig(configOpts.appId, configOpts.appSecret) + + // 设置用户鉴权相关字段 + if configOpts.userAccessToken != "" { + config.Feishu.UserAccessToken = configOpts.userAccessToken + } + if configOpts.authType != "" { + config.Feishu.AuthType = configOpts.authType + } + + // 验证配置 + if err = config.Feishu.Validate(); err != nil { + return err + } + if err = config.WriteConfig2File(configPath); err != nil { return err } fmt.Println(utils.PrettyPrint(config)) } else { + // 更新现有配置 config, err := core.ReadConfigFromFile(configPath) if err != nil { return err } + + // 更新字段 if configOpts.appId != "" { config.Feishu.AppId = configOpts.appId } if configOpts.appSecret != "" { config.Feishu.AppSecret = configOpts.appSecret } - if configOpts.appId != "" || configOpts.appSecret != "" { + if configOpts.userAccessToken != "" { + config.Feishu.UserAccessToken = configOpts.userAccessToken + } + if configOpts.authType != "" { + config.Feishu.AuthType = configOpts.authType + } + + // 验证配置 + if err = config.Feishu.Validate(); err != nil { + return err + } + + // 如果有任何字段被修改,保存配置 + if configOpts.appId != "" || configOpts.appSecret != "" || + configOpts.userAccessToken != "" || configOpts.authType != "" { if err = config.WriteConfig2File(configPath); err != nil { return err } diff --git a/cmd/download.go b/cmd/download.go index a73064f..16c9e9f 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -53,9 +53,20 @@ func downloadDocument(ctx context.Context, client *core.Client, url string, opts docx, blocks, err := client.GetDocxContent(ctx, docToken) utils.CheckErr(err) + title := docx.Title + + // 确定输出文件名 + var mdName string + if dlConfig.Output.TitleAsFilename { + mdName = fmt.Sprintf("%s.md", utils.SanitizeFileName(title)) + } else { + mdName = fmt.Sprintf("%s.md", docToken) + } + outputPath := filepath.Join(opts.outputDir, mdName) + + // 继续执行下载流程 parser := core.NewParser(dlConfig.Output) - title := docx.Title markdown := parser.ParseDocxContent(docx, blocks) if !dlConfig.Output.SkipImgDownload { @@ -85,7 +96,7 @@ func downloadDocument(ctx context.Context, client *core.Client, url string, opts if dlOpts.dump { jsonName := fmt.Sprintf("%s.json", docToken) - outputPath := filepath.Join(opts.outputDir, jsonName) + jsonOutputPath := filepath.Join(opts.outputDir, jsonName) data := struct { Document *lark.DocxDocument `json:"document"` Blocks []*lark.DocxBlock `json:"blocks"` @@ -95,22 +106,17 @@ func downloadDocument(ctx context.Context, client *core.Client, url string, opts } pdata := utils.PrettyPrint(data) - if err = os.WriteFile(outputPath, []byte(pdata), 0o644); err != nil { + if err = os.WriteFile(jsonOutputPath, []byte(pdata), 0o644); err != nil { return err } - fmt.Printf("Dumped json response to %s\n", outputPath) + fmt.Printf("Dumped json response to %s\n", jsonOutputPath) } // Write to markdown file - mdName := fmt.Sprintf("%s.md", docToken) - if dlConfig.Output.TitleAsFilename { - mdName = fmt.Sprintf("%s.md", utils.SanitizeFileName(title)) - } - outputPath := filepath.Join(opts.outputDir, mdName) if err = os.WriteFile(outputPath, []byte(result), 0o644); err != nil { return err } - fmt.Printf("Downloaded markdown file to %s\n", outputPath) + fmt.Printf("✓ Downloaded markdown file to %s\n", outputPath) return nil } @@ -134,7 +140,11 @@ func downloadDocuments(ctx context.Context, client *core.Client, url string) err if err != nil { return err } - opts := DownloadOpts{outputDir: folderPath, dump: dlOpts.dump, batch: false} + opts := DownloadOpts{ + outputDir: folderPath, + dump: dlOpts.dump, + batch: false, + } for _, file := range files { if file.Type == "folder" { _folderPath := filepath.Join(folderPath, file.Name) @@ -213,7 +223,11 @@ func downloadWiki(ctx context.Context, client *core.Client, url string) error { } } if n.ObjType == "docx" { - opts := DownloadOpts{outputDir: folderPath, dump: dlOpts.dump, batch: false} + opts := DownloadOpts{ + outputDir: folderPath, + dump: dlOpts.dump, + batch: false, + } wg.Add(1) semaphore <- struct{}{} go func(_url string) { @@ -223,7 +237,6 @@ func downloadWiki(ctx context.Context, client *core.Client, url string) error { wg.Done() <-semaphore }(prefixURL + "/wiki/" + n.NodeToken) - // downloadDocument(ctx, client, prefixURL+"/wiki/"+n.NodeToken, &opts) } } return nil @@ -257,18 +270,15 @@ func handleDownloadCommand(url string) error { dlConfig = *config // Instantiate the client - client := core.NewClient( - dlConfig.Feishu.AppId, dlConfig.Feishu.AppSecret, - ) + client := core.NewClient(dlConfig.Feishu) ctx := context.Background() + // 执行下载 if dlOpts.batch { return downloadDocuments(ctx, client, url) - } - - if dlOpts.wiki { + } else if dlOpts.wiki { return downloadWiki(ctx, client, url) + } else { + return downloadDocument(ctx, client, url, &dlOpts) } - - return downloadDocument(ctx, client, url, &dlOpts) } diff --git a/cmd/main.go b/cmd/main.go index f045038..243253e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -27,15 +27,28 @@ func main() { &cli.StringFlag{ Name: "appId", Value: "", - Usage: "Set app id for the OPEN API", + Usage: "Set app id for the OPEN API (app auth)", Destination: &configOpts.appId, }, &cli.StringFlag{ Name: "appSecret", Value: "", - Usage: "Set app secret for the OPEN API", + Usage: "Set app secret for the OPEN API (app auth)", Destination: &configOpts.appSecret, }, + &cli.StringFlag{ + Name: "userAccessToken", + Aliases: []string{"uat"}, + Value: "", + Usage: "Set user access token for the OPEN API (user auth)", + Destination: &configOpts.userAccessToken, + }, + &cli.StringFlag{ + Name: "authType", + Value: "", + Usage: "Set authentication type: 'app' or 'user'", + Destination: &configOpts.authType, + }, }, Action: func(ctx *cli.Context) error { return handleConfigCommand() @@ -82,6 +95,71 @@ func main() { } }, }, + { + Name: "sync", + Usage: "Sync feishu/larksuite folder or wiki to local directory (with incremental download, filtering)", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o"}, + Value: "./", + Usage: "Specify the output directory for the markdown files", + Destination: &syncOpts.outputDir, + }, + &cli.BoolFlag{ + Name: "incremental", + Aliases: []string{"i"}, + Value: true, + Usage: "Enable incremental sync (skip unchanged documents, enabled by default)", + Destination: &syncOpts.incremental, + }, + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Value: false, + Usage: "Force re-download all documents (ignore cache)", + Destination: &syncOpts.force, + }, + &cli.StringFlag{ + Name: "include", + Value: "", + Usage: "Only sync directories matching patterns (comma-separated, supports wildcards like *test*)", + Destination: &syncOpts.include, + }, + &cli.StringFlag{ + Name: "exclude", + Value: "", + Usage: "Exclude directories matching patterns (comma-separated, supports wildcards like *draft*)", + Destination: &syncOpts.exclude, + }, + &cli.IntFlag{ + Name: "concurrency", + Aliases: []string{"c"}, + Value: 5, + Usage: "Maximum number of concurrent downloads", + Destination: &syncOpts.concurrency, + }, + &cli.BoolFlag{ + Name: "dump", + Value: false, + Usage: "Dump json response of the OPEN API", + Destination: &syncOpts.dump, + }, + }, + ArgsUsage: "[url]", + Action: func(ctx *cli.Context) error { + // 参数验证:force 和 incremental 互斥 + if syncOpts.force && syncOpts.incremental { + // force 模式下自动禁用 incremental + syncOpts.incremental = false + } + url := "" + if ctx.NArg() > 0 { + url = ctx.Args().First() + } + return handleSyncCommand(url) + }, + }, }, } diff --git a/cmd/sync.go b/cmd/sync.go new file mode 100644 index 0000000..9530e49 --- /dev/null +++ b/cmd/sync.go @@ -0,0 +1,475 @@ +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/88250/lute" + "github.com/Wsine/feishu2md/core" + "github.com/Wsine/feishu2md/utils" + "github.com/chyroc/lark" +) + +type SyncOpts struct { + outputDir string + incremental bool // 增量同步,默认开启 + force bool // 强制重新下载 + include string // 仅下载匹配的目录(白名单,逗号分隔) + exclude string // 排除匹配的目录(黑名单,逗号分隔) + concurrency int // 并发数 + dump bool // 导出 JSON 响应 +} + +var syncOpts = SyncOpts{} +var syncConfig core.Config + +// syncDocument 同步单个文档 +func syncDocument(ctx context.Context, client *core.Client, url string, opts *SyncOpts, cacheManager *core.CacheManager) error { + // Validate the url to download + docType, docToken, err := utils.ValidateDocumentURL(url) + if err != nil { + return err + } + + // for a wiki page, we need to renew docType and docToken first + if docType == "wiki" { + node, err := client.GetWikiNodeInfo(ctx, docToken) + if err != nil { + err = fmt.Errorf("GetWikiNodeInfo err: %v for %v", err, url) + } + utils.CheckErr(err) + docType = node.ObjType + docToken = node.ObjToken + } + + // Process the download + docx, blocks, err := client.GetDocxContent(ctx, docToken) + utils.CheckErr(err) + + title := docx.Title + revisionID := docx.RevisionID + + // 确定输出文件名 + var mdName string + if syncConfig.Output.TitleAsFilename { + mdName = fmt.Sprintf("%s.md", utils.SanitizeFileName(title)) + } else { + mdName = fmt.Sprintf("%s.md", docToken) + } + outputPath := filepath.Join(opts.outputDir, mdName) + + // 增量下载逻辑:检查是否需要下载 + if opts.incremental && !opts.force && cacheManager != nil { + shouldDownload, skipReason := cacheManager.ShouldDownload( + docToken, + revisionID, + outputPath, + ) + + if !shouldDownload { + fmt.Printf("⊘ 跳过: %s - %s\n", title, skipReason) + // 即使跳过下载,也要更新缓存(用于建立缓存映射) + cacheManager.UpdateDocument( + docToken, + revisionID, + title, + mdName, + docType, + ) + return nil + } + } + + // 继续执行下载流程 + parser := core.NewParser(syncConfig.Output) + + markdown := parser.ParseDocxContent(docx, blocks) + + if !syncConfig.Output.SkipImgDownload { + for _, imgToken := range parser.ImgTokens { + localLink, err := client.DownloadImage( + ctx, imgToken, filepath.Join(opts.outputDir, syncConfig.Output.ImageDir), + ) + if err != nil { + return err + } + markdown = strings.Replace(markdown, imgToken, localLink, 1) + } + } + + // Format the markdown document + engine := lute.New(func(l *lute.Lute) { + l.RenderOptions.AutoSpace = true + }) + result := engine.FormatStr("md", markdown) + + // Handle the output directory and name + if _, err := os.Stat(opts.outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(opts.outputDir, 0o755); err != nil { + return err + } + } + + if opts.dump { + jsonName := fmt.Sprintf("%s.json", docToken) + jsonOutputPath := filepath.Join(opts.outputDir, jsonName) + data := struct { + Document *lark.DocxDocument `json:"document"` + Blocks []*lark.DocxBlock `json:"blocks"` + }{ + Document: docx, + Blocks: blocks, + } + pdata := utils.PrettyPrint(data) + + if err = os.WriteFile(jsonOutputPath, []byte(pdata), 0o644); err != nil { + return err + } + fmt.Printf("Dumped json response to %s\n", jsonOutputPath) + } + + // Write to markdown file + if err = os.WriteFile(outputPath, []byte(result), 0o644); err != nil { + return err + } + fmt.Printf("✓ 已同步: %s\n", outputPath) + + // 更新缓存 + if cacheManager != nil { + cacheManager.UpdateDocument( + docToken, + revisionID, + title, + mdName, + docType, + ) + } + + return nil +} + +// syncFolder 同步云盘文件夹 +func syncFolder(ctx context.Context, client *core.Client, url string, opts *SyncOpts, cacheManager *core.CacheManager, filter *core.NodeFilter) error { + // Validate the url to download + folderToken, err := utils.ValidateFolderURL(url) + if err != nil { + return err + } + fmt.Println("Captured folder token:", folderToken) + + // Error channel and wait group + errChan := make(chan error) + wg := sync.WaitGroup{} + semaphore := make(chan struct{}, opts.concurrency) + + // Recursively go through the folder and download the documents + var processFolder func(ctx context.Context, folderPath, folderToken string) error + processFolder = func(ctx context.Context, folderPath, folderToken string) error { + files, err := client.GetDriveFolderFileList(ctx, nil, &folderToken) + if err != nil { + return err + } + docOpts := &SyncOpts{ + outputDir: folderPath, + dump: opts.dump, + incremental: opts.incremental, + force: opts.force, + concurrency: opts.concurrency, + } + for _, file := range files { + if file.Type == "folder" { + // 检查文件夹是否应该被下载 + if filter != nil && !filter.ShouldDownloadFolder(folderPath, file.Name) { + fmt.Printf("⊘ 跳过文件夹: %s\n", file.Name) + continue + } + _folderPath := filepath.Join(folderPath, file.Name) + if err := processFolder(ctx, _folderPath, file.Token); err != nil { + return err + } + } else if file.Type == "docx" { + // 检查文档的父目录是否被排除 + if filter != nil && !filter.ShouldDownloadDocument(folderPath) { + continue + } + // concurrently download the document + wg.Add(1) + semaphore <- struct{}{} + go func(_url string) { + defer func() { + wg.Done() + <-semaphore + }() + if err := syncDocument(ctx, client, _url, docOpts, cacheManager); err != nil { + errChan <- err + } + }(file.URL) + } + } + return nil + } + if err := processFolder(ctx, opts.outputDir, folderToken); err != nil { + return err + } + + // Wait for all the downloads to finish + go func() { + wg.Wait() + close(errChan) + }() + for err := range errChan { + return err + } + return nil +} + +// syncWiki 同步知识库 +func syncWiki(ctx context.Context, client *core.Client, url string, opts *SyncOpts, cacheManager *core.CacheManager, filter *core.NodeFilter) error { + prefixURL, spaceID, err := utils.ValidateWikiURL(url) + if err != nil { + return err + } + + folderPath, err := client.GetWikiName(ctx, spaceID) + if err != nil { + return err + } + if folderPath == "" { + return fmt.Errorf("failed to GetWikiName") + } + + errChan := make(chan error) + + wg := sync.WaitGroup{} + semaphore := make(chan struct{}, opts.concurrency) + + var downloadWikiNode func(ctx context.Context, + client *core.Client, + spaceID string, + parentPath string, + parentNodeToken *string) error + + downloadWikiNode = func(ctx context.Context, + client *core.Client, + spaceID string, + folderPath string, + parentNodeToken *string) error { + nodes, err := client.GetWikiNodeList(ctx, spaceID, parentNodeToken) + if err != nil { + return err + } + for _, n := range nodes { + if n.HasChild { + // 检查目录是否应该被下载 + if filter != nil { + include, skippedByParent := filter.ShouldIncludeNode(folderPath, n.Title) + if !include { + if skippedByParent { + fmt.Printf("⊘ 跳过目录(父目录已排除): %s\n", n.Title) + } else { + fmt.Printf("⊘ 跳过目录: %s\n", n.Title) + } + continue + } + } + _folderPath := filepath.Join(folderPath, n.Title) + if err := downloadWikiNode(ctx, client, + spaceID, _folderPath, &n.NodeToken); err != nil { + return err + } + } + if n.ObjType == "docx" { + // 检查文档的父目录是否被排除 + if filter != nil && !filter.ShouldDownloadDocument(folderPath) { + continue + } + docOpts := &SyncOpts{ + outputDir: folderPath, + dump: opts.dump, + incremental: opts.incremental, + force: opts.force, + concurrency: opts.concurrency, + } + wg.Add(1) + semaphore <- struct{}{} + go func(_url string) { + defer func() { + wg.Done() + <-semaphore + }() + if err := syncDocument(ctx, client, _url, docOpts, cacheManager); err != nil { + errChan <- err + } + }(prefixURL + "/wiki/" + n.NodeToken) + } + } + return nil + } + + if err = downloadWikiNode(ctx, client, spaceID, folderPath, nil); err != nil { + return err + } + + // Wait for all the downloads to finish + go func() { + wg.Wait() + close(errChan) + }() + for err := range errChan { + return err + } + return nil +} + +// detectURLType 检测 URL 类型 +func detectURLType(url string) (string, error) { + // 尝试作为 Wiki URL + if _, _, err := utils.ValidateWikiURL(url); err == nil { + return core.SourceTypeWiki, nil + } + + // 尝试作为文件夹 URL + if _, err := utils.ValidateFolderURL(url); err == nil { + return core.SourceTypeFolder, nil + } + + return "", fmt.Errorf("URL 格式不正确,sync 命令仅支持文件夹或知识库 URL") +} + +func handleSyncCommand(url string) error { + // Load config + configPath, err := core.GetConfigFilePath() + if err != nil { + return err + } + config, err := core.ReadConfigFromFile(configPath) + if err != nil { + return err + } + syncConfig = *config + + // 尝试加载已有的同步配置 + existingSyncConfig, err := core.LoadSyncConfig(syncOpts.outputDir) + if err != nil { + fmt.Fprintf(os.Stderr, "警告: 加载同步配置失败: %v\n", err) + } + + // 如果没有提供 URL,尝试从同步配置中读取 + if url == "" { + if existingSyncConfig != nil && existingSyncConfig.SourceURL != "" { + url = existingSyncConfig.SourceURL + fmt.Printf("使用已保存的同步配置: %s\n", url) + } else { + return fmt.Errorf("请提供要同步的 URL,或在已有同步配置的目录中运行") + } + } + + // 检测 URL 类型 + sourceType, err := detectURLType(url) + if err != nil { + return err + } + fmt.Printf("检测到源类型: %s\n", sourceType) + + // 创建或更新同步配置 + var currentSyncConfig *core.SyncConfig + if existingSyncConfig != nil { + currentSyncConfig = existingSyncConfig + currentSyncConfig.SourceURL = url + currentSyncConfig.SourceType = sourceType + } else { + currentSyncConfig = core.NewSyncConfig(url, sourceType) + } + + // 更新同步配置(合并命令行参数) + currentSyncConfig.Update( + core.ParsePatterns(syncOpts.include), + core.ParsePatterns(syncOpts.exclude), + syncOpts.concurrency, + ) + + // Instantiate the client + client := core.NewClient(syncConfig.Feishu) + ctx := context.Background() + + // 初始化缓存管理器(sync 命令总是启用缓存) + cacheManager, err := core.NewCacheManager(syncOpts.outputDir) + if err != nil { + fmt.Fprintf(os.Stderr, "警告: 无法初始化缓存管理器: %v\n", err) + cacheManager = nil + } + + if syncOpts.force { + fmt.Println("强制模式: 将重新下载所有文档并更新缓存") + } else if syncOpts.incremental { + fmt.Println("增量模式: 将跳过未修改的文档") + } + + // 初始化目录过滤器 + var nodeFilter *core.NodeFilter + includePatterns := currentSyncConfig.Include + excludePatterns := currentSyncConfig.Exclude + + // 命令行参数优先 + if syncOpts.include != "" { + includePatterns = core.ParsePatterns(syncOpts.include) + } + if syncOpts.exclude != "" { + excludePatterns = core.ParsePatterns(syncOpts.exclude) + } + + if len(includePatterns) > 0 || len(excludePatterns) > 0 { + filterConfig := core.FilterConfig{ + IncludePatterns: includePatterns, + ExcludePatterns: excludePatterns, + } + nodeFilter = core.NewNodeFilter(filterConfig) + + fmt.Println("目录过滤已启用:") + if len(filterConfig.IncludePatterns) > 0 { + fmt.Printf(" 包含: %v\n", filterConfig.IncludePatterns) + } + if len(filterConfig.ExcludePatterns) > 0 { + fmt.Printf(" 排除: %v\n", filterConfig.ExcludePatterns) + } + } + + // 设置并发数 + if syncOpts.concurrency <= 0 { + syncOpts.concurrency = currentSyncConfig.Concurrency + } + fmt.Printf("并发数: %d\n", syncOpts.concurrency) + + // 执行同步 + var syncErr error + switch sourceType { + case core.SourceTypeFolder: + syncErr = syncFolder(ctx, client, url, &syncOpts, cacheManager, nodeFilter) + case core.SourceTypeWiki: + syncErr = syncWiki(ctx, client, url, &syncOpts, cacheManager, nodeFilter) + } + + // 保存缓存 + if cacheManager != nil { + if err := cacheManager.Save(); err != nil { + fmt.Fprintf(os.Stderr, "警告: 缓存保存失败: %v\n", err) + } else { + fmt.Println("✓ 缓存已更新") + } + } + + // 保存同步配置 + if syncErr == nil { + if err := currentSyncConfig.Save(syncOpts.outputDir); err != nil { + fmt.Fprintf(os.Stderr, "警告: 同步配置保存失败: %v\n", err) + } else { + fmt.Printf("✓ 同步配置已保存到 %s\n", core.GetSyncConfigPath(syncOpts.outputDir)) + } + } + + return syncErr +} diff --git a/core/cache.go b/core/cache.go new file mode 100644 index 0000000..f819e93 --- /dev/null +++ b/core/cache.go @@ -0,0 +1,211 @@ +package core + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// CacheVersion 缓存文件格式版本 +const CacheVersion = "1.0" + +// DocumentCache 单个文档的缓存信息 +type DocumentCache struct { + RevisionID int64 `json:"revision_id"` // 文档版本号 + Title string `json:"title"` // 文档标题 + FileName string `json:"file_name"` // 实际保存的文件名 + LastDownload time.Time `json:"last_download"` // 上次下载时间 + DocType string `json:"doc_type"` // 文档类型 (docx/wiki) +} + +// CacheManager 缓存管理器 +type CacheManager struct { + Version string `json:"version"` // 缓存格式版本 + UpdatedAt time.Time `json:"updated_at"` // 缓存更新时间 + Documents map[string]*DocumentCache `json:"documents"` // 文档token -> 缓存信息映射 + + filePath string // 缓存文件路径 + mutex sync.RWMutex // 读写锁保护并发访问 + dirty bool // 标记是否有修改未保存 +} + +// NewCacheManager 创建新的缓存管理器 +func NewCacheManager(outputDir string) (*CacheManager, error) { + cachePath := filepath.Join(outputDir, ".feishu2md.cache.json") + + cm := &CacheManager{ + Version: CacheVersion, + UpdatedAt: time.Now(), + Documents: make(map[string]*DocumentCache), + filePath: cachePath, + dirty: false, + } + + // 尝试加载现有缓存 + if err := cm.Load(); err != nil && !os.IsNotExist(err) { + // 缓存文件损坏或无法读取,返回空缓存但不报错 + return cm, nil + } + + return cm, nil +} + +// Load 从文件加载缓存 +func (cm *CacheManager) Load() error { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + data, err := os.ReadFile(cm.filePath) + if err != nil { + return err + } + + // 尝试解析JSON + if err := json.Unmarshal(data, cm); err != nil { + return err + } + + // 版本兼容性检查 + if cm.Version != CacheVersion { + // 未来可能需要处理版本迁移 + cm.Version = CacheVersion + cm.dirty = true + } + + cm.dirty = false + return nil +} + +// Save 保存缓存到文件 +func (cm *CacheManager) Save() error { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if !cm.dirty { + return nil // 没有修改不需要保存 + } + + cm.UpdatedAt = time.Now() + + // 确保输出目录存在 + if err := os.MkdirAll(filepath.Dir(cm.filePath), 0o755); err != nil { + return err + } + + data, err := json.MarshalIndent(cm, "", " ") + if err != nil { + return err + } + + // 原子写入:先写临时文件再重命名 + tmpPath := cm.filePath + ".tmp" + if err := os.WriteFile(tmpPath, data, 0o644); err != nil { + return err + } + + if err := os.Rename(tmpPath, cm.filePath); err != nil { + os.Remove(tmpPath) // 清理临时文件 + return err + } + + cm.dirty = false + return nil +} + +// ShouldDownload 判断文档是否需要下载 +// 返回: (需要下载, 跳过原因) +func (cm *CacheManager) ShouldDownload( + docToken string, + remoteRevisionID int64, + fileName string, +) (bool, string) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + cache, exists := cm.Documents[docToken] + + // 1. 缓存中不存在 + if !exists { + // 检查文件是否已存在(可能是旧版本下载的) + if _, err := os.Stat(fileName); err == nil { + // 文件存在,跳过下载,但外层会更新缓存建立映射 + return false, fmt.Sprintf("文件已存在,建立缓存映射 (版本: %d)", remoteRevisionID) + } + // 文件也不存在,需要下载 + return true, "" + } + + // 2. 版本号不同,需要下载 + if cache.RevisionID != remoteRevisionID { + return true, "" + } + + // 3. 版本号相同,检查文件是否存在 + if _, err := os.Stat(fileName); os.IsNotExist(err) { + return true, "" + } + + // 4. 版本相同且文件存在,跳过 + return false, fmt.Sprintf("文档未修改 (版本: %d)", remoteRevisionID) +} + +// UpdateDocument 更新文档缓存信息 +func (cm *CacheManager) UpdateDocument( + docToken string, + revisionID int64, + title string, + fileName string, + docType string, +) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + cm.Documents[docToken] = &DocumentCache{ + RevisionID: revisionID, + Title: title, + FileName: fileName, + LastDownload: time.Now(), + DocType: docType, + } + cm.dirty = true +} + +// GetDocumentCache 获取文档缓存信息(只读) +func (cm *CacheManager) GetDocumentCache(docToken string) (*DocumentCache, bool) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + cache, exists := cm.Documents[docToken] + return cache, exists +} + +// RemoveDocument 从缓存中移除文档 +func (cm *CacheManager) RemoveDocument(docToken string) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if _, exists := cm.Documents[docToken]; exists { + delete(cm.Documents, docToken) + cm.dirty = true + } +} + +// GetStats 获取缓存统计信息 +func (cm *CacheManager) GetStats() (totalDocs int, oldestDownload time.Time) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + totalDocs = len(cm.Documents) + oldestDownload = time.Now() + + for _, doc := range cm.Documents { + if doc.LastDownload.Before(oldestDownload) { + oldestDownload = doc.LastDownload + } + } + + return +} diff --git a/core/client.go b/core/client.go index 7120b9b..15d4097 100644 --- a/core/client.go +++ b/core/client.go @@ -14,23 +14,48 @@ import ( ) type Client struct { - larkClient *lark.Lark + larkClient *lark.Lark + authType string + userAccessToken string } -func NewClient(appID, appSecret string) *Client { - return &Client{ - larkClient: lark.New( - lark.WithAppCredential(appID, appSecret), +func NewClient(config FeishuConfig) *Client { + var larkClient *lark.Lark + + if config.AuthType == AuthTypeUser { + // 用户鉴权:不需要应用凭证 + larkClient = lark.New( lark.WithTimeout(60*time.Second), lark.WithApiMiddleware(lark_rate_limiter.Wait(4, 4)), - ), + ) + } else { + // 应用鉴权(默认) + larkClient = lark.New( + lark.WithAppCredential(config.AppId, config.AppSecret), + lark.WithTimeout(60*time.Second), + lark.WithApiMiddleware(lark_rate_limiter.Wait(4, 4)), + ) + } + + return &Client{ + larkClient: larkClient, + authType: config.AuthType, + userAccessToken: config.UserAccessToken, + } +} + +// getMethodOptions 返回 API 调用时需要的鉴权选项 +func (c *Client) getMethodOptions() []lark.MethodOptionFunc { + if c.authType == AuthTypeUser && c.userAccessToken != "" { + return []lark.MethodOptionFunc{lark.WithUserAccessToken(c.userAccessToken)} } + return nil } func (c *Client) DownloadImage(ctx context.Context, imgToken, outDir string) (string, error) { resp, _, err := c.larkClient.Drive.DownloadDriveMedia(ctx, &lark.DownloadDriveMediaReq{ FileToken: imgToken, - }) + }, c.getMethodOptions()...) if err != nil { return imgToken, err } @@ -55,7 +80,7 @@ func (c *Client) DownloadImage(ctx context.Context, imgToken, outDir string) (st func (c *Client) DownloadImageRaw(ctx context.Context, imgToken, imgDir string) (string, []byte, error) { resp, _, err := c.larkClient.Drive.DownloadDriveMedia(ctx, &lark.DownloadDriveMediaReq{ FileToken: imgToken, - }) + }, c.getMethodOptions()...) if err != nil { return imgToken, nil, err } @@ -69,7 +94,7 @@ func (c *Client) DownloadImageRaw(ctx context.Context, imgToken, imgDir string) func (c *Client) GetDocxContent(ctx context.Context, docToken string) (*lark.DocxDocument, []*lark.DocxBlock, error) { resp, _, err := c.larkClient.Drive.GetDocxDocument(ctx, &lark.GetDocxDocumentReq{ DocumentID: docToken, - }) + }, c.getMethodOptions()...) if err != nil { return nil, nil, err } @@ -84,7 +109,7 @@ func (c *Client) GetDocxContent(ctx context.Context, docToken string) (*lark.Doc resp2, _, err := c.larkClient.Drive.GetDocxBlockListOfDocument(ctx, &lark.GetDocxBlockListOfDocumentReq{ DocumentID: docx.DocumentID, PageToken: pageToken, - }) + }, c.getMethodOptions()...) if err != nil { return docx, nil, err } @@ -100,7 +125,7 @@ func (c *Client) GetDocxContent(ctx context.Context, docToken string) (*lark.Doc func (c *Client) GetWikiNodeInfo(ctx context.Context, token string) (*lark.GetWikiNodeRespNode, error) { resp, _, err := c.larkClient.Drive.GetWikiNode(ctx, &lark.GetWikiNodeReq{ Token: token, - }) + }, c.getMethodOptions()...) if err != nil { return nil, err } @@ -112,7 +137,7 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, PageSize: nil, PageToken: pageToken, FolderToken: folderToken, - }) + }, c.getMethodOptions()...) if err != nil { return nil, err } @@ -122,7 +147,7 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, PageSize: nil, PageToken: &resp.NextPageToken, FolderToken: folderToken, - }) + }, c.getMethodOptions()...) if err != nil { return nil, err } @@ -134,7 +159,7 @@ func (c *Client) GetDriveFolderFileList(ctx context.Context, pageToken *string, func (c *Client) GetWikiName(ctx context.Context, spaceID string) (string, error) { resp, _, err := c.larkClient.Drive.GetWikiSpace(ctx, &lark.GetWikiSpaceReq{ SpaceID: spaceID, - }) + }, c.getMethodOptions()...) if err != nil { return "", err @@ -149,7 +174,7 @@ func (c *Client) GetWikiNodeList(ctx context.Context, spaceID string, parentNode PageSize: nil, PageToken: nil, ParentNodeToken: parentNodeToken, - }) + }, c.getMethodOptions()...) if err != nil { return nil, err @@ -165,7 +190,7 @@ func (c *Client) GetWikiNodeList(ctx context.Context, spaceID string, parentNode PageSize: nil, PageToken: &resp.PageToken, ParentNodeToken: parentNodeToken, - }) + }, c.getMethodOptions()...) if err != nil { return nil, err diff --git a/core/client_test.go b/core/client_test.go index b9bc4ac..f786ed0 100644 --- a/core/client_test.go +++ b/core/client_test.go @@ -9,40 +9,37 @@ import ( "github.com/Wsine/feishu2md/core" ) -func getIdAndSecretFromEnv(t *testing.T) (string, string) { - appID := "" - appSecret := "" - +func getConfigFromEnv(t *testing.T) core.FeishuConfig { configPath, err := core.GetConfigFilePath() if err != nil { t.Error(err) } if _, err := os.Stat(configPath); os.IsNotExist(err) { - appID = os.Getenv("FEISHU_APP_ID") - appSecret = os.Getenv("FEISHU_APP_SECRET") + return core.FeishuConfig{ + AppId: os.Getenv("FEISHU_APP_ID"), + AppSecret: os.Getenv("FEISHU_APP_SECRET"), + AuthType: core.AuthTypeApp, + } } else { config, err := core.ReadConfigFromFile(configPath) if err != nil { t.Error(err) } - appID = config.Feishu.AppId - appSecret = config.Feishu.AppSecret + return config.Feishu } - - return appID, appSecret } func TestNewClient(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) if c == nil { t.Errorf("Error creating DocClient") } } func TestDownloadImage(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) imgToken := "boxcnA1QKPanfMhLxzF1eMhoArM" filename, err := c.DownloadImage( context.Background(), @@ -62,8 +59,8 @@ func TestDownloadImage(t *testing.T) { } func TestGetDocxContent(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) docx, blocks, err := c.GetDocxContent( context.Background(), "doxcnXhd93zqoLnmVPGIPTy7AFe", @@ -82,8 +79,8 @@ func TestGetDocxContent(t *testing.T) { } func TestGetWikiNodeInfo(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) const token = "wikcnLgRX9AMtvaB5x1cl57Yuah" node, err := c.GetWikiNodeInfo(context.Background(), token) if err != nil { @@ -95,8 +92,8 @@ func TestGetWikiNodeInfo(t *testing.T) { } func TestGetDriveFolderFileList(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) folderToken := "G15mfSfIHlyquudfhq5cg9kdnjg" files, err := c.GetDriveFolderFileList( context.Background(), nil, &folderToken) @@ -109,8 +106,8 @@ func TestGetDriveFolderFileList(t *testing.T) { } func TestGetWikiNodeList(t *testing.T) { - appID, appSecret := getIdAndSecretFromEnv(t) - c := core.NewClient(appID, appSecret) + config := getConfigFromEnv(t) + c := core.NewClient(config) wikiToken := "7376995595006787612" nodes, err := c.GetWikiNodeList(context.Background(), wikiToken, nil) if err != nil { diff --git a/core/config.go b/core/config.go index c0aea08..d6dfd22 100644 --- a/core/config.go +++ b/core/config.go @@ -2,19 +2,37 @@ package core import ( "encoding/json" + "fmt" "os" "path" "path/filepath" ) +// 鉴权类型常量 +const ( + AuthTypeApp = "app" + AuthTypeUser = "user" +) + +// 配置版本 +const ConfigVersion = "2.0" + type Config struct { - Feishu FeishuConfig `json:"feishu"` - Output OutputConfig `json:"output"` + Version string `json:"version,omitempty"` + Feishu FeishuConfig `json:"feishu"` + Output OutputConfig `json:"output"` } type FeishuConfig struct { AppId string `json:"app_id"` AppSecret string `json:"app_secret"` + + // 用户鉴权相关字段 + UserAccessToken string `json:"user_access_token,omitempty"` + + // 鉴权类型选择: "app" 或 "user" + // 默认为 "app",保持向后兼容 + AuthType string `json:"auth_type,omitempty"` } type OutputConfig struct { @@ -26,9 +44,11 @@ type OutputConfig struct { func NewConfig(appId, appSecret string) *Config { return &Config{ + Version: ConfigVersion, Feishu: FeishuConfig{ AppId: appId, AppSecret: appSecret, + AuthType: AuthTypeApp, // 默认应用鉴权 }, Output: OutputConfig{ ImageDir: "static", @@ -39,6 +59,32 @@ func NewConfig(appId, appSecret string) *Config { } } +// Validate 验证配置的有效性 +func (fc *FeishuConfig) Validate() error { + // 设置默认值 + if fc.AuthType == "" { + fc.AuthType = AuthTypeApp + } + + // 验证 AuthType 的有效性 + if fc.AuthType != AuthTypeApp && fc.AuthType != AuthTypeUser { + return fmt.Errorf("invalid auth_type: %s, must be 'app' or 'user'", fc.AuthType) + } + + // 验证必需字段 + if fc.AuthType == AuthTypeApp { + if fc.AppId == "" || fc.AppSecret == "" { + return fmt.Errorf("app_id and app_secret are required for app authentication") + } + } else if fc.AuthType == AuthTypeUser { + if fc.UserAccessToken == "" { + return fmt.Errorf("user_access_token is required for user authentication") + } + } + + return nil +} + func GetConfigFilePath() (string, error) { configPath, err := os.UserConfigDir() if err != nil { @@ -58,9 +104,45 @@ func ReadConfigFromFile(configPath string) (*Config, error) { if err != nil { return nil, err } + + // 迁移旧配置 + migrated := config.Migrate() + + // 验证配置 + if err = config.Feishu.Validate(); err != nil { + return nil, err + } + + // 如果发生迁移,自动保存新配置 + if migrated { + fmt.Println("配置文件已自动迁移到 v2.0 格式") + if err = config.WriteConfig2File(configPath); err != nil { + fmt.Printf("警告:保存迁移后的配置失败: %v\n", err) + } + } + return config, nil } +// Migrate 迁移旧版本配置到新版本,返回是否发生迁移 +func (c *Config) Migrate() bool { + migrated := false + + // 检测旧配置(无 version 字段) + if c.Version == "" { + c.Version = ConfigVersion + migrated = true + } + + // 确保 AuthType 有默认值 + if c.Feishu.AuthType == "" { + c.Feishu.AuthType = AuthTypeApp + migrated = true + } + + return migrated +} + func (conf *Config) WriteConfig2File(configPath string) error { err := os.MkdirAll(filepath.Dir(configPath), 0o755) if err != nil { diff --git a/core/config_test.go b/core/config_test.go new file mode 100644 index 0000000..aca1dc4 --- /dev/null +++ b/core/config_test.go @@ -0,0 +1,207 @@ +package core + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFeishuConfigValidate(t *testing.T) { + tests := []struct { + name string + config FeishuConfig + wantErr bool + errMsg string + }{ + { + name: "有效的应用鉴权配置", + config: FeishuConfig{ + AppId: "cli_test123", + AppSecret: "secret123", + AuthType: AuthTypeApp, + }, + wantErr: false, + }, + { + name: "有效的用户鉴权配置", + config: FeishuConfig{ + UserAccessToken: "u-test-token", + AuthType: AuthTypeUser, + }, + wantErr: false, + }, + { + name: "AuthType 默认值", + config: FeishuConfig{ + AppId: "cli_test123", + AppSecret: "secret123", + AuthType: "", // 空值应默认为 "app" + }, + wantErr: false, + }, + { + name: "无效的 AuthType", + config: FeishuConfig{ + AuthType: "invalid", + }, + wantErr: true, + errMsg: "invalid auth_type", + }, + { + name: "应用鉴权缺少 AppId", + config: FeishuConfig{ + AppSecret: "secret123", + AuthType: AuthTypeApp, + }, + wantErr: true, + errMsg: "app_id and app_secret are required", + }, + { + name: "用户鉴权缺少 UserAccessToken", + config: FeishuConfig{ + AuthType: AuthTypeUser, + }, + wantErr: true, + errMsg: "user_access_token is required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if tt.wantErr { + assert.Error(t, err) + if tt.errMsg != "" { + assert.Contains(t, err.Error(), tt.errMsg) + } + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestConfigReadWrite(t *testing.T) { + // 创建临时目录 + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, "config.json") + + // 测试应用鉴权配置 + appConfig := NewConfig("test_app_id", "test_app_secret") + err := appConfig.WriteConfig2File(configPath) + assert.NoError(t, err) + + readConfig, err := ReadConfigFromFile(configPath) + assert.NoError(t, err) + assert.Equal(t, "test_app_id", readConfig.Feishu.AppId) + assert.Equal(t, "test_app_secret", readConfig.Feishu.AppSecret) + assert.Equal(t, AuthTypeApp, readConfig.Feishu.AuthType) + + // 测试用户鉴权配置 + userConfig := NewConfig("", "") + userConfig.Feishu.UserAccessToken = "u-test-token" + userConfig.Feishu.AuthType = AuthTypeUser + err = userConfig.WriteConfig2File(configPath) + assert.NoError(t, err) + + readConfig2, err := ReadConfigFromFile(configPath) + assert.NoError(t, err) + assert.Equal(t, "u-test-token", readConfig2.Feishu.UserAccessToken) + assert.Equal(t, AuthTypeUser, readConfig2.Feishu.AuthType) +} + +func TestConfigMigrate(t *testing.T) { + tests := []struct { + name string + config Config + wantMigrated bool + wantVersion string + wantAuthType string + }{ + { + name: "新配置无需迁移", + config: Config{ + Version: ConfigVersion, + Feishu: FeishuConfig{ + AppId: "test_id", + AppSecret: "test_secret", + AuthType: AuthTypeApp, + }, + }, + wantMigrated: false, + wantVersion: ConfigVersion, + wantAuthType: AuthTypeApp, + }, + { + name: "旧配置需要迁移版本号", + config: Config{ + Version: "", + Feishu: FeishuConfig{ + AppId: "test_id", + AppSecret: "test_secret", + AuthType: AuthTypeApp, + }, + }, + wantMigrated: true, + wantVersion: ConfigVersion, + wantAuthType: AuthTypeApp, + }, + { + name: "旧配置需要迁移AuthType", + config: Config{ + Version: ConfigVersion, + Feishu: FeishuConfig{ + AppId: "test_id", + AppSecret: "test_secret", + AuthType: "", + }, + }, + wantMigrated: true, + wantVersion: ConfigVersion, + wantAuthType: AuthTypeApp, + }, + { + name: "旧配置需要迁移版本号和AuthType", + config: Config{ + Version: "", + Feishu: FeishuConfig{ + AppId: "test_id", + AppSecret: "test_secret", + AuthType: "", + }, + }, + wantMigrated: true, + wantVersion: ConfigVersion, + wantAuthType: AuthTypeApp, + }, + { + name: "用户鉴权配置无需迁移AuthType", + config: Config{ + Version: "", + Feishu: FeishuConfig{ + UserAccessToken: "u-test", + AuthType: AuthTypeUser, + }, + }, + wantMigrated: true, + wantVersion: ConfigVersion, + wantAuthType: AuthTypeUser, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + migrated := tt.config.Migrate() + assert.Equal(t, tt.wantMigrated, migrated) + assert.Equal(t, tt.wantVersion, tt.config.Version) + assert.Equal(t, tt.wantAuthType, tt.config.Feishu.AuthType) + }) + } +} + +func TestNewConfigHasVersion(t *testing.T) { + config := NewConfig("test_id", "test_secret") + assert.Equal(t, ConfigVersion, config.Version) + assert.Equal(t, AuthTypeApp, config.Feishu.AuthType) +} diff --git a/core/filter.go b/core/filter.go new file mode 100644 index 0000000..87d0a57 --- /dev/null +++ b/core/filter.go @@ -0,0 +1,192 @@ +package core + +import ( + "path/filepath" + "strings" +) + +// FilterConfig 目录过滤配置 +type FilterConfig struct { + IncludePatterns []string // 包含模式列表(白名单) + ExcludePatterns []string // 排除模式列表(黑名单) +} + +// NodeFilter 节点过滤器 +type NodeFilter struct { + config FilterConfig + excludedPaths map[string]bool // 已排除的路径缓存(用于跟踪父目录) + includedPaths map[string]bool // 已包含的路径缓存(用于 Include 白名单检查) +} + +// NewNodeFilter 创建节点过滤器 +func NewNodeFilter(config FilterConfig) *NodeFilter { + return &NodeFilter{ + config: config, + excludedPaths: make(map[string]bool), + includedPaths: make(map[string]bool), + } +} + +// ParsePatterns 解析逗号分隔的模式字符串 +func ParsePatterns(patterns string) []string { + if patterns == "" { + return nil + } + parts := strings.Split(patterns, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} + +// matchPattern 检查名称是否匹配单个模式 +// 支持 Go 的 filepath.Match 语法:*, ?, [abc], [a-z] +func matchPattern(name, pattern string) bool { + // 使用 filepath.Match 支持通配符 + matched, err := filepath.Match(pattern, name) + if err != nil { + return false + } + return matched +} + +// matchAnyPattern 检查名称是否匹配任一模式 +func matchAnyPattern(name string, patterns []string) bool { + for _, pattern := range patterns { + if matchPattern(name, pattern) { + return true + } + } + return false +} + +// ShouldIncludeNode 判断节点是否应该被包含 +// parentPath: 父目录的完整路径(用于检查父目录是否已被排除) +// nodeName: 当前节点名称 +// 返回: (是否包含, 是否为因父目录被排除而跳过) +func (f *NodeFilter) ShouldIncludeNode(parentPath, nodeName string) (include bool, skippedByParent bool) { + // 规范化路径,确保路径分隔符一致 + parentPath = filepath.Clean(parentPath) + currentPath := filepath.Join(parentPath, nodeName) + + // 检查父目录是否已被排除 + if f.isParentExcluded(parentPath) { + f.excludedPaths[currentPath] = true + return false, true + } + + // 无过滤条件时,默认包含 + if len(f.config.IncludePatterns) == 0 && len(f.config.ExcludePatterns) == 0 { + return true, false + } + + // 如果父目录已经被包含,子目录自动被包含(不再检查 include 模式) + // 但仍然检查 exclude 模式 + parentIncluded := f.isParentIncluded(parentPath) + + // 检查 include(白名单) + if len(f.config.IncludePatterns) > 0 { + // 如果父目录已包含,则子目录自动包含;否则检查当前节点是否匹配 + if !parentIncluded && !matchAnyPattern(nodeName, f.config.IncludePatterns) { + f.excludedPaths[currentPath] = true + return false, false + } + } + + // 再检查 exclude(黑名单)- 即使父目录被包含,也要检查排除规则 + if len(f.config.ExcludePatterns) > 0 { + if matchAnyPattern(nodeName, f.config.ExcludePatterns) { + f.excludedPaths[currentPath] = true + return false, false + } + } + + // 记录已包含的路径 + f.includedPaths[currentPath] = true + return true, false +} + +// isParentExcluded 检查父路径是否已被排除 +func (f *NodeFilter) isParentExcluded(path string) bool { + if path == "" || path == "." { + return false + } + + // 规范化路径 + path = filepath.Clean(path) + + // 递归检查所有父路径 + current := path + for current != "" && current != "." { + if f.excludedPaths[current] { + return true + } + parent := filepath.Dir(current) + if parent == current { + break + } + current = parent + } + return false +} + +// isParentIncluded 检查路径或其祖先路径是否被包含 +func (f *NodeFilter) isParentIncluded(path string) bool { + if path == "" || path == "." { + return false + } + + path = filepath.Clean(path) + current := path + for current != "" && current != "." { + if f.includedPaths[current] { + return true + } + parent := filepath.Dir(current) + if parent == current { + break + } + current = parent + } + return false +} + +// ShouldDownloadFolder 判断文件夹是否应该下载(用于 batch 模式) +func (f *NodeFilter) ShouldDownloadFolder(parentPath, folderName string) bool { + include, _ := f.ShouldIncludeNode(parentPath, folderName) + return include +} + +// ShouldDownloadDocument 判断文档是否应该下载 +// 文档本身不受过滤影响,只有其父目录被排除时才跳过 +// 当配置了 Include 白名单时,只有父目录在已包含路径中的文档才会被下载 +func (f *NodeFilter) ShouldDownloadDocument(parentPath string) bool { + parentPath = filepath.Clean(parentPath) + + // 检查父目录是否被排除 + if f.isParentExcluded(parentPath) || f.excludedPaths[parentPath] { + return false + } + + // 如果配置了 Include 白名单,检查父目录是否在已包含的路径中 + if len(f.config.IncludePatterns) > 0 { + return f.isParentIncluded(parentPath) + } + + return true +} + +// HasFilters 检查是否配置了过滤条件 +func (f *NodeFilter) HasFilters() bool { + return len(f.config.IncludePatterns) > 0 || len(f.config.ExcludePatterns) > 0 +} + +// Reset 重置过滤器状态(用于新的下载任务) +func (f *NodeFilter) Reset() { + f.excludedPaths = make(map[string]bool) + f.includedPaths = make(map[string]bool) +} diff --git a/core/filter_test.go b/core/filter_test.go new file mode 100644 index 0000000..d7ebea1 --- /dev/null +++ b/core/filter_test.go @@ -0,0 +1,453 @@ +package core + +import ( + "path/filepath" + "testing" +) + +func TestParsePatterns(t *testing.T) { + tests := []struct { + input string + expected []string + }{ + {"", nil}, + {"test", []string{"test"}}, + {"test,demo", []string{"test", "demo"}}, + {"test, demo, ", []string{"test", "demo"}}, + {"*测试*,*草稿*", []string{"*测试*", "*草稿*"}}, + {" spaced , patterns ", []string{"spaced", "patterns"}}, + } + + for _, tt := range tests { + result := ParsePatterns(tt.input) + if len(result) != len(tt.expected) { + t.Errorf("ParsePatterns(%q) = %v, want %v", tt.input, result, tt.expected) + continue + } + for i, v := range result { + if v != tt.expected[i] { + t.Errorf("ParsePatterns(%q)[%d] = %q, want %q", tt.input, i, v, tt.expected[i]) + } + } + } +} + +func TestMatchPattern(t *testing.T) { + tests := []struct { + name string + pattern string + want bool + }{ + {"test", "test", true}, + {"testing", "test*", true}, + {"mytest", "*test", true}, + {"mytesting", "*test*", true}, + {"测试文档", "*测试*", true}, + {"文档", "*测试*", false}, + {"draft-v1", "draft-*", true}, + {"abc", "???", true}, + {"ab", "???", false}, + {"file.txt", "*.txt", true}, + {"file.md", "*.txt", false}, + {"doc", "doc", true}, + {"docs", "doc", false}, + } + + for _, tt := range tests { + got := matchPattern(tt.name, tt.pattern) + if got != tt.want { + t.Errorf("matchPattern(%q, %q) = %v, want %v", + tt.name, tt.pattern, got, tt.want) + } + } +} + +func TestMatchAnyPattern(t *testing.T) { + tests := []struct { + name string + patterns []string + want bool + }{ + {"test", []string{"test", "demo"}, true}, + {"demo", []string{"test", "demo"}, true}, + {"other", []string{"test", "demo"}, false}, + {"testing", []string{"test*", "demo*"}, true}, + {"文档", []string{"*测试*", "*文档*"}, true}, + {"any", []string{}, false}, + } + + for _, tt := range tests { + got := matchAnyPattern(tt.name, tt.patterns) + if got != tt.want { + t.Errorf("matchAnyPattern(%q, %v) = %v, want %v", + tt.name, tt.patterns, got, tt.want) + } + } +} + +func TestNodeFilter_ShouldIncludeNode_IncludeOnly(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + IncludePatterns: []string{"*文档*", "API*"}, + }) + + tests := []struct { + parent string + name string + include bool + }{ + {"", "技术文档", true}, + {"", "API指南", true}, + {"", "草稿", false}, + {"", "其他资料", false}, + } + + for _, tt := range tests { + include, _ := filter.ShouldIncludeNode(tt.parent, tt.name) + if include != tt.include { + t.Errorf("ShouldIncludeNode(%q, %q) = %v, want %v", + tt.parent, tt.name, include, tt.include) + } + } +} + +func TestNodeFilter_ShouldIncludeNode_ExcludeOnly(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"*草稿*", "*测试*"}, + }) + + tests := []struct { + parent string + name string + include bool + }{ + {"", "正式文档", true}, + {"", "测试文档", false}, + {"", "草稿", false}, + {"", "产品草稿", false}, + } + + for _, tt := range tests { + include, _ := filter.ShouldIncludeNode(tt.parent, tt.name) + if include != tt.include { + t.Errorf("ShouldIncludeNode(%q, %q) = %v, want %v", + tt.parent, tt.name, include, tt.include) + } + } +} + +func TestNodeFilter_ShouldIncludeNode_IncludeAndExclude(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + IncludePatterns: []string{"*文档*"}, + ExcludePatterns: []string{"*草稿*"}, + }) + + tests := []struct { + parent string + name string + include bool + }{ + {"", "技术文档", true}, + {"", "草稿文档", false}, // 被 exclude 排除 + {"", "其他资料", false}, // 不匹配 include + } + + for _, tt := range tests { + include, _ := filter.ShouldIncludeNode(tt.parent, tt.name) + if include != tt.include { + t.Errorf("ShouldIncludeNode(%q, %q) = %v, want %v", + tt.parent, tt.name, include, tt.include) + } + } +} + +func TestNodeFilter_ShouldIncludeNode_ParentExcluded(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"草稿"}, + }) + + // 排除"草稿"目录 + include, skipped := filter.ShouldIncludeNode("wiki", "草稿") + if include { + t.Error("Expected '草稿' to be excluded") + } + if skipped { + t.Error("Expected skippedByParent to be false for first exclusion") + } + + // 子目录应该因父目录被排除而跳过 + // 使用 filepath.Join 确保路径分隔符一致 + parentPath := filepath.Join("wiki", "草稿") + include, skipped = filter.ShouldIncludeNode(parentPath, "子文件夹") + if include { + t.Error("Expected '子文件夹' to be excluded because parent is excluded") + } + if !skipped { + t.Error("Expected skippedByParent to be true") + } + + // 深层嵌套也应该被跳过 + deepParentPath := filepath.Join("wiki", "草稿", "子文件夹") + include, skipped = filter.ShouldIncludeNode(deepParentPath, "深层文件夹") + if include { + t.Error("Expected '深层文件夹' to be excluded because ancestor is excluded") + } + if !skipped { + t.Error("Expected skippedByParent to be true for deeply nested") + } +} + +func TestNodeFilter_NoFilters(t *testing.T) { + filter := NewNodeFilter(FilterConfig{}) + + include, _ := filter.ShouldIncludeNode("", "任意目录") + if !include { + t.Error("Expected all nodes to be included when no filters") + } + + if filter.HasFilters() { + t.Error("Expected HasFilters() to return false") + } +} + +func TestNodeFilter_HasFilters(t *testing.T) { + tests := []struct { + config FilterConfig + want bool + }{ + {FilterConfig{}, false}, + {FilterConfig{IncludePatterns: []string{"test"}}, true}, + {FilterConfig{ExcludePatterns: []string{"test"}}, true}, + {FilterConfig{IncludePatterns: []string{"a"}, ExcludePatterns: []string{"b"}}, true}, + } + + for _, tt := range tests { + filter := NewNodeFilter(tt.config) + if got := filter.HasFilters(); got != tt.want { + t.Errorf("HasFilters() with %+v = %v, want %v", tt.config, got, tt.want) + } + } +} + +func TestNodeFilter_ShouldDownloadFolder(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"*draft*"}, + }) + + if !filter.ShouldDownloadFolder("", "docs") { + t.Error("Expected 'docs' folder to be downloadable") + } + + if filter.ShouldDownloadFolder("", "draft-v1") { + t.Error("Expected 'draft-v1' folder to be excluded") + } +} + +func TestNodeFilter_ShouldDownloadDocument(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"草稿"}, + }) + + // 排除"草稿"目录 + filter.ShouldIncludeNode("wiki", "草稿") + + // 文档在正常路径下应该可以下载 + normalPath := filepath.Join("wiki", "正式") + if !filter.ShouldDownloadDocument(normalPath) { + t.Error("Expected document in 'wiki/正式' to be downloadable") + } + + // 文档在被排除的路径下不应该下载 + excludedPath := filepath.Join("wiki", "草稿") + if filter.ShouldDownloadDocument(excludedPath) { + t.Error("Expected document in 'wiki/草稿' to be excluded") + } +} + +func TestNodeFilter_Reset(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"test"}, + }) + + // 排除一个目录 + filter.ShouldIncludeNode("", "test") + + // 子目录应该被跳过(因为父目录被排除) + include, skipped := filter.ShouldIncludeNode("test", "child") + if include { + t.Error("Expected child to be excluded before reset") + } + if !skipped { + t.Error("Expected child to be skipped by parent before reset") + } + + // 重置 + filter.Reset() + + // 重置后,"test" 目录仍然会被排除(因为匹配规则),但不是因为缓存 + include, _ = filter.ShouldIncludeNode("", "test") + if include { + t.Error("Expected 'test' to still be excluded by pattern after reset") + } + + // 现在 "test/child" 会因为父目录被排除而跳过 + include, skipped = filter.ShouldIncludeNode("test", "child") + if include { + t.Error("Expected child to be excluded because parent matches exclude pattern") + } + if !skipped { + t.Error("Expected child to be skipped by parent after reset") + } +} + +func TestNodeFilter_ShouldDownloadDocument_WithIncludePatterns(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + IncludePatterns: []string{"*管理与规范*"}, + }) + + // 模拟 Wiki 结构: + // wiki/ <- 根目录 + // ├── 首页 <- 不匹配,被排除 + // ├── 测试管理 <- 不匹配,被排除 + // ├── 测试管理与规范 <- 匹配,被包含 + // └── doc.md <- 在根目录下的文档 + + // 处理目录 + include, _ := filter.ShouldIncludeNode("wiki", "首页") + if include { + t.Error("Expected '首页' to be excluded") + } + + include, _ = filter.ShouldIncludeNode("wiki", "测试管理") + if include { + t.Error("Expected '测试管理' to be excluded") + } + + include, _ = filter.ShouldIncludeNode("wiki", "测试管理与规范") + if !include { + t.Error("Expected '测试管理与规范' to be included") + } + + // 测试文档下载 + // 根目录下的文档不应该下载(因为配置了 Include 白名单,但根目录不在 includedPaths 中) + if filter.ShouldDownloadDocument("wiki") { + t.Error("Expected document in root 'wiki' to NOT be downloaded when include patterns are set") + } + + // 被排除目录下的文档不应该下载 + excludedPath := filepath.Join("wiki", "首页") + if filter.ShouldDownloadDocument(excludedPath) { + t.Error("Expected document in excluded path to NOT be downloaded") + } + + // 被包含目录下的文档应该下载 + includedPath := filepath.Join("wiki", "测试管理与规范") + if !filter.ShouldDownloadDocument(includedPath) { + t.Error("Expected document in included path '测试管理与规范' to be downloaded") + } + + // 被包含目录的子目录下的文档也应该下载 + nestedPath := filepath.Join("wiki", "测试管理与规范", "子目录") + if !filter.ShouldDownloadDocument(nestedPath) { + t.Error("Expected document in nested path under included directory to be downloaded") + } +} + +func TestNodeFilter_ShouldDownloadDocument_NoFilters(t *testing.T) { + filter := NewNodeFilter(FilterConfig{}) + + // 无过滤条件时,所有文档都应该下载 + if !filter.ShouldDownloadDocument("wiki") { + t.Error("Expected document to be downloadable when no filters") + } + + if !filter.ShouldDownloadDocument(filepath.Join("wiki", "any", "path")) { + t.Error("Expected document to be downloadable when no filters") + } +} + +func TestNodeFilter_ShouldDownloadDocument_ExcludeOnly(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + ExcludePatterns: []string{"草稿"}, + }) + + // 排除"草稿"目录 + filter.ShouldIncludeNode("wiki", "草稿") + filter.ShouldIncludeNode("wiki", "正式") + + // 只配置 Exclude 时,正常路径下的文档应该可以下载 + normalPath := filepath.Join("wiki", "正式") + if !filter.ShouldDownloadDocument(normalPath) { + t.Error("Expected document in non-excluded path to be downloadable") + } + + // 被排除路径下的文档不应该下载 + excludedPath := filepath.Join("wiki", "草稿") + if filter.ShouldDownloadDocument(excludedPath) { + t.Error("Expected document in excluded path to NOT be downloaded") + } + + // 根目录下的文档应该可以下载(因为没有配置 Include 白名单) + if !filter.ShouldDownloadDocument("wiki") { + t.Error("Expected document in root to be downloadable when only exclude patterns are set") + } +} + +func TestNodeFilter_ChildDirectoriesAutoIncluded(t *testing.T) { + filter := NewNodeFilter(FilterConfig{ + IncludePatterns: []string{"*测试*"}, + }) + + // 模拟 Wiki 结构: + // wiki/ + // ├── 测试管理/ <- 匹配 *测试*,被包含 + // │ ├── 能力建设规划/ <- 不匹配 *测试*,但父目录已包含,应该自动包含 + // │ │ └── doc.md + // │ └── 子目录A/ + // │ └── 深层目录/ + // │ └── doc.md + // └── 其他目录/ <- 不匹配,被排除 + + // 一级目录:测试管理 匹配 + include, _ := filter.ShouldIncludeNode("wiki", "测试管理") + if !include { + t.Error("Expected '测试管理' to be included (matches pattern)") + } + + // 一级目录:其他目录 不匹配 + include, _ = filter.ShouldIncludeNode("wiki", "其他目录") + if include { + t.Error("Expected '其他目录' to be excluded (doesn't match pattern)") + } + + // 二级目录:能力建设规划 不匹配模式,但父目录已包含 + parentPath := filepath.Join("wiki", "测试管理") + include, _ = filter.ShouldIncludeNode(parentPath, "能力建设规划") + if !include { + t.Error("Expected '能力建设规划' to be auto-included (parent is included)") + } + + // 三级目录:深层目录 也应该自动包含 + deepParentPath := filepath.Join("wiki", "测试管理", "能力建设规划") + include, _ = filter.ShouldIncludeNode(deepParentPath, "深层目录") + if !include { + t.Error("Expected '深层目录' to be auto-included (ancestor is included)") + } + + // 验证文档下载 + // 被包含目录的子目录下的文档应该可以下载 + docPath := filepath.Join("wiki", "测试管理", "能力建设规划") + if !filter.ShouldDownloadDocument(docPath) { + t.Error("Expected document in auto-included directory to be downloadable") + } + + deepDocPath := filepath.Join("wiki", "测试管理", "能力建设规划", "深层目录") + if !filter.ShouldDownloadDocument(deepDocPath) { + t.Error("Expected document in deeply nested auto-included directory to be downloadable") + } + + // 被排除目录下的文档不应该下载 + excludedDocPath := filepath.Join("wiki", "其他目录") + if filter.ShouldDownloadDocument(excludedDocPath) { + t.Error("Expected document in excluded directory to NOT be downloadable") + } +} diff --git a/core/sync_config.go b/core/sync_config.go new file mode 100644 index 0000000..e80df98 --- /dev/null +++ b/core/sync_config.go @@ -0,0 +1,99 @@ +package core + +import ( + "encoding/json" + "os" + "path/filepath" + "time" +) + +const ( + SyncConfigFileName = ".feishu2md.sync.json" + SyncConfigVersion = "1.0" + + // 源类型常量 + SourceTypeFolder = "folder" + SourceTypeWiki = "wiki" +) + +// SyncConfig 同步配置,保存在输出目录下 +type SyncConfig struct { + Version string `json:"version"` + SourceURL string `json:"source_url"` + SourceType string `json:"source_type"` // "folder" | "wiki" + Include []string `json:"include,omitempty"` + Exclude []string `json:"exclude,omitempty"` + Concurrency int `json:"concurrency"` + LastSync time.Time `json:"last_sync"` +} + +// NewSyncConfig 创建新的同步配置 +func NewSyncConfig(sourceURL, sourceType string) *SyncConfig { + return &SyncConfig{ + Version: SyncConfigVersion, + SourceURL: sourceURL, + SourceType: sourceType, + Include: nil, + Exclude: nil, + Concurrency: 5, + LastSync: time.Time{}, + } +} + +// GetSyncConfigPath 获取同步配置文件路径 +func GetSyncConfigPath(outputDir string) string { + return filepath.Join(outputDir, SyncConfigFileName) +} + +// LoadSyncConfig 从输出目录加载同步配置 +func LoadSyncConfig(outputDir string) (*SyncConfig, error) { + configPath := GetSyncConfigPath(outputDir) + + file, err := os.ReadFile(configPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil // 配置不存在,返回 nil + } + return nil, err + } + + var config SyncConfig + if err := json.Unmarshal(file, &config); err != nil { + return nil, err + } + + return &config, nil +} + +// Save 保存同步配置到输出目录 +func (c *SyncConfig) Save(outputDir string) error { + // 确保目录存在 + if err := os.MkdirAll(outputDir, 0o755); err != nil { + return err + } + + // 更新最后同步时间 + c.LastSync = time.Now() + + configPath := GetSyncConfigPath(outputDir) + + data, err := json.MarshalIndent(c, "", " ") + if err != nil { + return err + } + + return os.WriteFile(configPath, data, 0o644) +} + +// Update 更新同步配置(合并命令行参数) +func (c *SyncConfig) Update(include, exclude []string, concurrency int) { + if len(include) > 0 { + c.Include = include + } + if len(exclude) > 0 { + c.Exclude = exclude + } + if concurrency > 0 { + c.Concurrency = concurrency + } +} diff --git a/core/sync_config_test.go b/core/sync_config_test.go new file mode 100644 index 0000000..bb1aa65 --- /dev/null +++ b/core/sync_config_test.go @@ -0,0 +1,136 @@ +package core + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestNewSyncConfig(t *testing.T) { + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + + assert.Equal(t, SyncConfigVersion, config.Version) + assert.Equal(t, "https://example.feishu.cn/wiki/xxx", config.SourceURL) + assert.Equal(t, SourceTypeWiki, config.SourceType) + assert.Equal(t, 5, config.Concurrency) + assert.Nil(t, config.Include) + assert.Nil(t, config.Exclude) + assert.True(t, config.LastSync.IsZero()) +} + +func TestSyncConfigSaveAndLoad(t *testing.T) { + tmpDir := t.TempDir() + + // 创建并保存配置 + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + config.Include = []string{"*文档*", "API*"} + config.Exclude = []string{"*草稿*"} + config.Concurrency = 10 + + err := config.Save(tmpDir) + assert.NoError(t, err) + + // 验证文件存在 + configPath := GetSyncConfigPath(tmpDir) + _, err = os.Stat(configPath) + assert.NoError(t, err) + + // 加载配置 + loaded, err := LoadSyncConfig(tmpDir) + assert.NoError(t, err) + assert.NotNil(t, loaded) + + assert.Equal(t, SyncConfigVersion, loaded.Version) + assert.Equal(t, "https://example.feishu.cn/wiki/xxx", loaded.SourceURL) + assert.Equal(t, SourceTypeWiki, loaded.SourceType) + assert.Equal(t, []string{"*文档*", "API*"}, loaded.Include) + assert.Equal(t, []string{"*草稿*"}, loaded.Exclude) + assert.Equal(t, 10, loaded.Concurrency) + assert.False(t, loaded.LastSync.IsZero()) +} + +func TestLoadSyncConfig_NotExist(t *testing.T) { + tmpDir := t.TempDir() + + // 加载不存在的配置 + config, err := LoadSyncConfig(tmpDir) + assert.NoError(t, err) + assert.Nil(t, config) +} + +func TestSyncConfigUpdate(t *testing.T) { + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + + // 更新配置 + config.Update( + []string{"include1", "include2"}, + []string{"exclude1"}, + 8, + ) + + assert.Equal(t, []string{"include1", "include2"}, config.Include) + assert.Equal(t, []string{"exclude1"}, config.Exclude) + assert.Equal(t, 8, config.Concurrency) +} + +func TestSyncConfigUpdate_EmptyValues(t *testing.T) { + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + config.Include = []string{"original"} + config.Exclude = []string{"original"} + config.Concurrency = 5 + + // 空值不应该覆盖现有值 + config.Update(nil, nil, 0) + + assert.Equal(t, []string{"original"}, config.Include) + assert.Equal(t, []string{"original"}, config.Exclude) + assert.Equal(t, 5, config.Concurrency) +} + +func TestSyncConfigUpdate_PartialUpdate(t *testing.T) { + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + config.Include = []string{"original_include"} + config.Exclude = []string{"original_exclude"} + + // 只更新 include + config.Update([]string{"new_include"}, nil, 0) + + assert.Equal(t, []string{"new_include"}, config.Include) + assert.Equal(t, []string{"original_exclude"}, config.Exclude) +} + +func TestGetSyncConfigPath(t *testing.T) { + path := GetSyncConfigPath("/tmp/output") + expected := filepath.Join("/tmp/output", SyncConfigFileName) + assert.Equal(t, expected, path) +} + +func TestSyncConfigSave_UpdatesLastSync(t *testing.T) { + tmpDir := t.TempDir() + + config := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + assert.True(t, config.LastSync.IsZero()) + + before := time.Now() + err := config.Save(tmpDir) + assert.NoError(t, err) + after := time.Now() + + // LastSync 应该被更新到当前时间 + assert.False(t, config.LastSync.IsZero()) + assert.True(t, config.LastSync.After(before) || config.LastSync.Equal(before)) + assert.True(t, config.LastSync.Before(after) || config.LastSync.Equal(after)) +} + +func TestSyncConfigSourceTypes(t *testing.T) { + // 测试 Wiki 类型 + wikiConfig := NewSyncConfig("https://example.feishu.cn/wiki/xxx", SourceTypeWiki) + assert.Equal(t, SourceTypeWiki, wikiConfig.SourceType) + + // 测试 Folder 类型 + folderConfig := NewSyncConfig("https://example.feishu.cn/drive/folder/xxx", SourceTypeFolder) + assert.Equal(t, SourceTypeFolder, folderConfig.SourceType) +} diff --git a/web/download.go b/web/download.go index deb9631..f45b2eb 100644 --- a/web/download.go +++ b/web/download.go @@ -35,9 +35,7 @@ func downloadHandler(c *gin.Context) { os.Getenv("FEISHU_APP_ID"), os.Getenv("FEISHU_APP_SECRET"), ) - client := core.NewClient( - config.Feishu.AppId, config.Feishu.AppSecret, - ) + client := core.NewClient(config.Feishu) // Process the download parser := core.NewParser(config.Output)