diff --git a/.gitignore b/.gitignore index 4d7e1c3..2fc85e3 100755 --- a/.gitignore +++ b/.gitignore @@ -71,4 +71,10 @@ htmlcov/ # Package artifacts dist/ build/ -*.egg-info/ \ No newline at end of file +*.egg-info/ + +# Node.js +scripts/node_modules/ + +# Local config (contains personal paths/URLs) +sync-config.json \ No newline at end of file diff --git a/SKILL.md b/SKILL.md index 2be7e16..157ec87 100755 --- a/SKILL.md +++ b/SKILL.md @@ -248,12 +248,56 @@ Synthesize and respond to user 5. **Include context** - Each question is independent 6. **Synthesize answers** - Combine multiple responses +## Auto-Sync Feature (NEW) + +Automatically sync local files to Google Drive and add them to NotebookLM notebooks. + +### Quick Commands + +```bash +# Full sync (Drive + NotebookLM) +notebooklm-sync + +# Drive sync only (faster, hourly via LaunchAgent) +notebooklm-sync --drive-only + +# Check sync status +notebooklm-sync --status +``` + +### Configuration + +Edit `~/.claude/skills/notebooklm/sync-config.json`: + +```json +{ + "sync_mappings": [ + { + "name": "Argus Investment Analysis", + "local_path": "~/Dropbox/PKM-Vault/1-Projects/Argus/Shared report", + "drive_folder": "NotebookLM-Sources/Argus-Investment-Analysis", + "notebook_url": "https://notebooklm.google.com/notebook/xxx", + "enabled": true + } + ] +} +``` + +### Manual Add Sources + +```bash +cd ~/.claude/skills/notebooklm && source venv/bin/activate +python scripts/add_sources.py \ + --notebook-url "https://notebooklm.google.com/notebook/xxx" \ + --drive-folder "NotebookLM-Sources/MyFolder" +``` + ## Limitations - No session persistence (each question = new browser) - Rate limits on free Google accounts (50 queries/day) -- Manual upload required (user must add docs to NotebookLM) - Browser overhead (few seconds per question) +- Auto-sync requires Google OAuth credentials setup ## Resources (Skill Structure) diff --git a/scripts/add_sources.py b/scripts/add_sources.py new file mode 100755 index 0000000..8b33f64 --- /dev/null +++ b/scripts/add_sources.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python3 +""" +Add Sources to NotebookLM from Google Drive +Automatically adds new files from a Google Drive folder to a NotebookLM notebook +""" + +import sys +import json +import time +import argparse +from pathlib import Path +from typing import List, Optional + +from patchright.sync_api import sync_playwright, Page, BrowserContext + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent)) + +from browser_utils import BrowserFactory, StealthUtils +from config import STATE_FILE + + +class SourceAdder: + """Adds sources from Google Drive to NotebookLM notebooks""" + + def __init__(self, headless: bool = False): + self.headless = headless + self.stealth = StealthUtils() + self.context: Optional[BrowserContext] = None + self.page: Optional[Page] = None + self.main_page: Optional[Page] = None # Keep reference to main page + self.playwright = None + + def __enter__(self): + self.playwright = sync_playwright().start() + self.context = BrowserFactory.launch_persistent_context( + self.playwright, + headless=self.headless + ) + self.page = self.context.new_page() + self.main_page = self.page # Save reference + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.main_page: + try: + self.main_page.close() + except: + pass + if self.context: + self.context.close() + if self.playwright: + self.playwright.stop() + + def add_sources_from_drive( + self, + notebook_url: str, + drive_folder: str, + file_names: Optional[List[str]] = None + ) -> dict: + """ + Add sources from a Google Drive folder to a NotebookLM notebook + + Args: + notebook_url: The NotebookLM notebook URL + drive_folder: Path in Google Drive (e.g., "NotebookLM-Sources/Argus") + file_names: Optional list of specific files to add (adds all if None) + + Returns: + Dict with status and details + """ + try: + print(f"🚀 Opening notebook: {notebook_url}") + self.page.goto(notebook_url, wait_until="domcontentloaded", timeout=30000) + + # Check if login needed + if "accounts.google.com" in self.page.url: + raise RuntimeError("Authentication required. Run auth_manager.py setup first.") + + # Wait for page to load + self.stealth.random_delay(2000, 3000) + + # Click "Add source" button (新增來源) + print("📎 Looking for Add source button...") + add_source_btn = self._find_add_source_button() + if not add_source_btn: + raise RuntimeError("Could not find Add source button") + + add_source_btn.click() + self.stealth.random_delay(1000, 1500) + + # Click Google Drive option + print("📁 Selecting Google Drive...") + drive_option = self._find_drive_option() + if not drive_option: + raise RuntimeError("Could not find Google Drive option") + + drive_option.click() + self.stealth.random_delay(3000, 4000) + + # Check if Drive picker is in an iframe + self._switch_to_picker_iframe() + + # Navigate to folder in Drive picker + print(f"📂 Navigating to: {drive_folder}") + self._navigate_to_drive_folder(drive_folder) + + # Select files + print("📄 Selecting files...") + selected_count = self._select_files(file_names) + + if selected_count == 0: + print("⚠️ No files found to add") + return { + "status": "no_files", + "message": "No new files found in folder", + "notebook_url": notebook_url, + "drive_folder": drive_folder + } + + # Click Insert/Add button + print(f"✅ Adding {selected_count} files...") + self._click_insert_button() + + # Wait for processing + self.stealth.random_delay(3000, 5000) + + print(f"✅ Successfully added {selected_count} sources!") + return { + "status": "success", + "files_added": selected_count, + "notebook_url": notebook_url, + "drive_folder": drive_folder + } + + except Exception as e: + print(f"❌ Error: {e}") + # Take screenshot for debugging + try: + screenshot_path = Path(__file__).parent.parent / "data" / "error_screenshot.png" + self.page.screenshot(path=str(screenshot_path)) + print(f"📸 Screenshot saved: {screenshot_path}") + except: + pass + return { + "status": "error", + "error": str(e), + "notebook_url": notebook_url + } + + def _switch_to_picker_iframe(self): + """Switch to Google Picker iframe if present""" + try: + # Look for picker iframe + iframes = self.page.query_selector_all('iframe') + print(f" Found {len(iframes)} iframes") + + for iframe in iframes: + src = iframe.get_attribute('src') or '' + name = iframe.get_attribute('name') or '' + print(f" Iframe: src={src[:50]}..., name={name}") + + # Google Picker iframes usually have these patterns + if 'picker' in src.lower() or 'drive' in src.lower() or 'docs.google' in src: + frame = iframe.content_frame() + if frame: + self.page = frame + print(" ✓ Switched to picker iframe") + self._take_debug_screenshot("inside_iframe") + return True + + print(" No picker iframe found, staying on main page") + return False + except Exception as e: + print(f" ⚠️ Iframe handling error: {e}") + return False + + def _find_add_source_button(self): + """Find the Add source button""" + # Try various selectors for Add source button + selectors = [ + 'button:has-text("新增來源")', + 'button:has-text("Add source")', + 'button:has-text("新增")', + '[aria-label="Add source"]', + '[aria-label="新增來源"]', + 'button.add-source-button', + # Fallback: look for plus icon button + 'button:has(mat-icon:has-text("add"))', + ] + + for selector in selectors: + try: + element = self.page.query_selector(selector) + if element and element.is_visible(): + return element + except: + continue + + # Try finding by text content + buttons = self.page.query_selector_all('button') + for btn in buttons: + try: + text = btn.inner_text().lower() + if '新增' in text or 'add' in text or 'source' in text: + if btn.is_visible(): + return btn + except: + continue + + return None + + def _find_drive_option(self): + """Find the Google Drive option in the source picker""" + self._take_debug_screenshot("before_drive_click") + + # First, try to find the exact "Google 雲端硬碟" chip/button + # It appears as a clickable chip inside the Google Workspace section + try: + # Try using locator with text + drive_btn = self.page.locator('text="Google 雲端硬碟"').first + if drive_btn and drive_btn.is_visible(): + print(" Found: Google 雲端硬碟 via locator") + return drive_btn + except: + pass + + try: + drive_btn = self.page.locator('text="Google Drive"').first + if drive_btn and drive_btn.is_visible(): + print(" Found: Google Drive via locator") + return drive_btn + except: + pass + + # Try query selector with various patterns + selectors = [ + 'div:has-text("Google 雲端硬碟")', + 'span:has-text("Google 雲端硬碟")', + 'button:has-text("Google 雲端硬碟")', + '[data-value="google_drive"]', + '.source-option:has-text("雲端硬碟")', + ] + + for selector in selectors: + try: + elements = self.page.query_selector_all(selector) + for element in elements: + if element.is_visible(): + text = element.inner_text() + if '雲端硬碟' in text or 'Drive' in text: + print(f" Found via selector: {selector}") + return element + except: + continue + + # Fallback: find all clickable elements and look for Drive text + clickables = self.page.query_selector_all('div, span, button, a') + for elem in clickables: + try: + text = elem.inner_text() + # Look for exact match to avoid matching parent containers + if text.strip() == 'Google 雲端硬碟' or text.strip() == 'Google Drive': + if elem.is_visible(): + box = elem.bounding_box() + if box and box['width'] > 50 and box['height'] > 20: + print(f" Found clickable: {text.strip()}") + return elem + except: + continue + + print(" ⚠️ Could not find Google Drive option") + return None + + def _navigate_to_drive_folder(self, folder_path: str): + """Navigate through Google Drive folder structure using search""" + # Wait for Drive picker to load + self.stealth.random_delay(2000, 3000) + self._take_debug_screenshot("drive_picker_opened") + + # Use search bar to find the folder directly + last_folder = folder_path.split('/')[-1] + print(f" 🔍 Searching for folder: {last_folder}") + + # Find and click the search input + try: + search_input = self.page.query_selector('input[type="text"]') + if search_input and search_input.is_visible(): + search_input.click() + self.stealth.random_delay(300, 500) + search_input.fill(last_folder) + self.stealth.random_delay(500, 800) + self.main_page.keyboard.press("Enter") # Use main_page for keyboard + self.stealth.random_delay(2000, 3000) + print(f" ✓ Searched for: {last_folder}") + self._take_debug_screenshot("after_search") + + # Now find and double-click the folder in search results + try: + folder_result = self.page.locator(f'text="{last_folder}"').first + if folder_result and folder_result.is_visible(): + folder_result.dblclick() + self.stealth.random_delay(2000, 3000) + print(f" ✓ Entered folder: {last_folder}") + self._take_debug_screenshot("inside_folder") + return + except: + pass + except Exception as e: + print(f" ⚠️ Search failed: {e}") + + # Fallback: Try clicking the tab approach + print(" 📁 Trying tab navigation...") + + # Click 我的雲端硬碟 tab by finding the tab bar + try: + tabs = self.page.query_selector_all('[role="tab"], .tab-item, div[data-tab]') + for tab in tabs: + text = tab.inner_text() + if '我的雲端硬碟' in text or 'My Drive' in text: + tab.click() + self.stealth.random_delay(1500, 2000) + print(f" ✓ Clicked tab: {text}") + self._take_debug_screenshot("after_tab_click") + break + except: + pass + + # Navigate through folders + folders = folder_path.split('/') + for folder_name in folders: + if not folder_name: + continue + + self.stealth.random_delay(1000, 1500) + print(f" 📂 Looking for folder: {folder_name}") + + clicked = False + try: + folder_elem = self.page.locator(f'text="{folder_name}"').first + if folder_elem and folder_elem.is_visible(): + folder_elem.dblclick() + clicked = True + print(f" ✓ Entered folder: {folder_name}") + self.stealth.random_delay(1500, 2000) + except: + pass + + if not clicked: + self._take_debug_screenshot(f"could_not_find_{folder_name}") + print(f" ⚠️ Could not find folder: {folder_name}") + + def _take_debug_screenshot(self, name: str): + """Take a debug screenshot""" + try: + screenshot_dir = Path(__file__).parent.parent / "data" / "debug_screenshots" + screenshot_dir.mkdir(parents=True, exist_ok=True) + path = screenshot_dir / f"{name}.png" + # Use main_page for screenshots (Frame doesn't support screenshot) + self.main_page.screenshot(path=str(path)) + print(f" 📸 Screenshot: {path.name}") + except Exception as e: + print(f" ⚠️ Screenshot failed: {e}") + + def _select_files(self, file_names: Optional[List[str]] = None) -> int: + """Select files in the Drive picker""" + self.stealth.random_delay(1500, 2000) + self._take_debug_screenshot("before_file_selection") + + selected = 0 + + try: + # Find all clickable items in the picker + # Look for file items (usually divs with specific classes or data attributes) + file_items = self.page.query_selector_all('[role="option"], [role="gridcell"], .picker-item, div[data-id]') + print(f" Found {len(file_items)} potential items") + + for item in file_items: + try: + text = item.inner_text().strip() + + # Skip navigation elements and folders + skip_texts = ['My Drive', '我的雲端硬碟', '資料夾', 'folder', '近期存取', 'Recent'] + if any(skip in text for skip in skip_texts): + continue + + # Look for files (PDF, doc, etc.) - check if text contains file extension or is a document + is_file = any(ext in text.lower() for ext in ['.pdf', '.doc', '.txt', '.md', '投顧', '簡報', '月報', 'ms_', 'Semi', 'NVDA', 'Alchip', 'ASML']) + + if not is_file: + continue + + # If specific files requested, check if this is one of them + if file_names: + if not any(fn in text for fn in file_names): + continue + + # Click to select (Ctrl+Click for multi-select on Mac use Meta/Cmd) + modifier = ["Meta"] if selected > 0 else [] # Cmd key on Mac + item.click(modifiers=modifier) + selected += 1 + print(f" ✓ Selected: {text[:60]}...") + self.stealth.random_delay(300, 500) + + except Exception as e: + continue + + if selected == 0: + # Try alternative approach: click on visible thumbnails + thumbnails = self.page.query_selector_all('img[src*="thumbnail"], .picker-thumbnail') + print(f" Trying thumbnails: found {len(thumbnails)}") + for thumb in thumbnails[:10]: # Limit to first 10 + try: + # Click parent of thumbnail + parent = thumb.evaluate_handle('el => el.parentElement').as_element() + if parent: + text = parent.inner_text() + if any(ext in text.lower() for ext in ['.pdf', 'ms_', 'sino', '投顧', '月報']): + modifier = ["Meta"] if selected > 0 else [] + parent.click(modifiers=modifier) + selected += 1 + print(f" ✓ Selected via thumbnail: {text[:40]}...") + self.stealth.random_delay(300, 500) + except: + continue + + except Exception as e: + print(f" ⚠️ Error selecting files: {e}") + + self._take_debug_screenshot("after_file_selection") + return selected + + def _click_insert_button(self): + """Click the Insert/Add button to confirm selection""" + selectors = [ + 'button:has-text("插入")', + 'button:has-text("Insert")', + 'button:has-text("選取")', + 'button:has-text("Select")', + 'button:has-text("新增")', + 'button:has-text("Add")', + 'button[data-mdc-dialog-action="accept"]', + 'button.primary-button', + ] + + for selector in selectors: + try: + btn = self.page.query_selector(selector) + if btn and btn.is_visible(): + btn.click() + print(f" ✓ Clicked button: {selector}") + return + except: + continue + + # Fallback: press Enter + self.main_page.keyboard.press("Enter") + print(" ✓ Pressed Enter as fallback") + + +def main(): + parser = argparse.ArgumentParser(description='Add sources to NotebookLM from Google Drive') + parser.add_argument('--notebook-url', required=True, help='NotebookLM notebook URL') + parser.add_argument('--drive-folder', required=True, help='Google Drive folder path') + parser.add_argument('--files', nargs='*', help='Specific file names to add (optional)') + parser.add_argument('--headless', action='store_true', help='Run in headless mode') + + args = parser.parse_args() + + print("\n🔧 NotebookLM Source Adder") + print("=" * 40) + + with SourceAdder(headless=args.headless) as adder: + result = adder.add_sources_from_drive( + notebook_url=args.notebook_url, + drive_folder=args.drive_folder, + file_names=args.files + ) + + print("\n📊 Result:") + print(json.dumps(result, indent=2, ensure_ascii=False)) + + return 0 if result.get('status') == 'success' else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/auto_sync.py b/scripts/auto_sync.py new file mode 100755 index 0000000..aa56e14 --- /dev/null +++ b/scripts/auto_sync.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +""" +NotebookLM Auto-Sync: End-to-End Automation +Syncs local files to Google Drive and automatically adds them to NotebookLM notebooks + +Usage: + python auto_sync.py --config sync-config.json + python auto_sync.py --local ~/docs --drive "MyFolder" --notebook-url "https://..." +""" + +import os +import sys +import json +import argparse +import subprocess +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional, Any + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent)) + + +class NotebookLMAutoSync: + """End-to-end automation for NotebookLM document synchronization""" + + def __init__(self, config_path: Optional[str] = None): + self.config_path = config_path + self.config = self._load_config() if config_path else {} + self.scripts_dir = Path(__file__).parent + self.data_dir = self.scripts_dir.parent / "data" + self.sync_state_file = self.data_dir / "sync_state.json" + self.sync_state = self._load_sync_state() + + def _load_config(self) -> Dict: + """Load sync configuration""" + config_file = Path(self.config_path) + if not config_file.exists(): + raise FileNotFoundError(f"Config file not found: {self.config_path}") + + with open(config_file, 'r') as f: + return json.load(f) + + def _load_sync_state(self) -> Dict: + """Load sync state to track what's been synced""" + if self.sync_state_file.exists(): + with open(self.sync_state_file, 'r') as f: + return json.load(f) + return {"synced_files": {}, "last_sync": None} + + def _save_sync_state(self): + """Save sync state""" + self.sync_state["last_sync"] = datetime.now().isoformat() + self.data_dir.mkdir(parents=True, exist_ok=True) + with open(self.sync_state_file, 'w') as f: + json.dump(self.sync_state, f, indent=2, ensure_ascii=False) + + def _get_local_files(self, local_path: str) -> List[Dict]: + """Get list of local files to sync""" + path = Path(local_path).expanduser() + if not path.exists(): + print(f" ⚠️ Path not found: {local_path}") + return [] + + extensions = self.config.get("settings", {}).get( + "supported_extensions", [".md", ".pdf", ".txt", ".docx"] + ) + exclude = self.config.get("settings", {}).get( + "exclude_patterns", ["node_modules", ".git", ".obsidian", ".DS_Store"] + ) + + files = [] + for f in path.iterdir(): + if f.is_file(): + # Check extension + if f.suffix.lower() not in extensions: + continue + # Check exclude patterns + if any(ex in f.name for ex in exclude): + continue + + files.append({ + "name": f.name, + "path": str(f), + "size": f.stat().st_size, + "modified": f.stat().st_mtime + }) + + return files + + def _sync_to_drive(self, local_path: str, drive_folder: str) -> Dict: + """Sync local files to Google Drive using Node.js script""" + print(f"\n📤 Syncing to Google Drive: {local_path} → {drive_folder}") + + node_script = self.scripts_dir / "sync-to-drive.js" + if not node_script.exists(): + raise FileNotFoundError(f"sync-to-drive.js not found at {node_script}") + + # Run the Node.js sync script + result = subprocess.run( + ["node", str(node_script), "--local", local_path, "--drive", drive_folder], + capture_output=True, + text=True, + cwd=str(self.scripts_dir) + ) + + if result.returncode != 0: + print(f" ❌ Drive sync failed: {result.stderr}") + return {"status": "error", "error": result.stderr} + + print(result.stdout) + return {"status": "success", "output": result.stdout} + + def _add_to_notebook(self, notebook_url: str, drive_folder: str, new_files: List[str] = None) -> Dict: + """Add files from Drive to NotebookLM notebook""" + if not notebook_url: + print(" ⏭️ No notebook URL configured, skipping NotebookLM integration") + return {"status": "skipped", "reason": "no_notebook_url"} + + print(f"\n📚 Adding sources to NotebookLM...") + + # Import and use the add_sources module + from add_sources import SourceAdder + + try: + with SourceAdder(headless=True) as adder: + result = adder.add_sources_from_drive( + notebook_url=notebook_url, + drive_folder=drive_folder, + file_names=new_files + ) + return result + except Exception as e: + print(f" ❌ NotebookLM integration failed: {e}") + return {"status": "error", "error": str(e)} + + def sync_mapping(self, mapping: Dict) -> Dict: + """Sync a single mapping configuration""" + name = mapping.get("name", "Unknown") + local_path = mapping.get("local_path", "").replace("~", os.environ["HOME"]) + drive_folder = mapping.get("drive_folder", "") + notebook_url = mapping.get("notebook_url", "") + enabled = mapping.get("enabled", True) + + if not enabled: + return {"status": "disabled", "name": name} + + print(f"\n{'='*60}") + print(f"📂 Processing: {name}") + print(f"{'='*60}") + + # Get local files + local_files = self._get_local_files(local_path) + if not local_files: + print(f" 📭 No files to sync in {local_path}") + return {"status": "no_files", "name": name} + + print(f" 📄 Found {len(local_files)} local files") + + # Check for new/modified files + folder_state = self.sync_state["synced_files"].get(drive_folder, {}) + new_files = [] + for f in local_files: + prev_modified = folder_state.get(f["name"], {}).get("modified", 0) + if f["modified"] > prev_modified: + new_files.append(f["name"]) + + if not new_files: + print(f" ✅ All files already synced") + return {"status": "up_to_date", "name": name} + + print(f" 🆕 {len(new_files)} new/modified files: {', '.join(new_files[:5])}...") + + # Step 1: Sync to Google Drive + drive_result = self._sync_to_drive(local_path, drive_folder) + if drive_result.get("status") != "success": + return {"status": "drive_sync_failed", "name": name, "error": drive_result.get("error")} + + # Step 2: Add to NotebookLM (if configured) + notebook_result = self._add_to_notebook(notebook_url, drive_folder, new_files) + + # Update sync state + if drive_folder not in self.sync_state["synced_files"]: + self.sync_state["synced_files"][drive_folder] = {} + + for f in local_files: + self.sync_state["synced_files"][drive_folder][f["name"]] = { + "modified": f["modified"], + "size": f["size"], + "synced_at": datetime.now().isoformat() + } + + self._save_sync_state() + + return { + "status": "success", + "name": name, + "files_synced": len(new_files), + "drive_result": drive_result, + "notebook_result": notebook_result + } + + def run_all(self) -> Dict: + """Run sync for all enabled mappings""" + if not self.config: + raise ValueError("No configuration loaded") + + mappings = self.config.get("sync_mappings", []) + results = [] + + print("\n🚀 NotebookLM Auto-Sync Starting...") + print(f"📋 Processing {len(mappings)} mappings") + + for mapping in mappings: + result = self.sync_mapping(mapping) + results.append(result) + + # Summary + success_count = sum(1 for r in results if r.get("status") == "success") + print(f"\n{'='*60}") + print(f"📊 Sync Complete: {success_count}/{len(results)} successful") + print(f"{'='*60}") + + return { + "status": "complete", + "total": len(results), + "successful": success_count, + "results": results + } + + def run_single(self, local_path: str, drive_folder: str, notebook_url: str = None) -> Dict: + """Run sync for a single path (command-line mode)""" + mapping = { + "name": Path(local_path).name, + "local_path": local_path, + "drive_folder": drive_folder, + "notebook_url": notebook_url or "", + "enabled": True + } + return self.sync_mapping(mapping) + + +def main(): + parser = argparse.ArgumentParser( + description='NotebookLM Auto-Sync: Sync local files to Google Drive and NotebookLM' + ) + parser.add_argument('--config', help='Path to sync configuration file') + parser.add_argument('--local', help='Local folder path') + parser.add_argument('--drive', help='Google Drive folder path') + parser.add_argument('--notebook-url', help='NotebookLM notebook URL') + parser.add_argument('--dry-run', action='store_true', help='Show what would be synced') + + args = parser.parse_args() + + if args.config: + # Config-based sync + syncer = NotebookLMAutoSync(config_path=args.config) + result = syncer.run_all() + elif args.local and args.drive: + # Single folder sync + syncer = NotebookLMAutoSync() + result = syncer.run_single( + local_path=args.local, + drive_folder=args.drive, + notebook_url=args.notebook_url + ) + else: + parser.print_help() + print("\n📖 Examples:") + print(" python auto_sync.py --config ../sync-config.json") + print(" python auto_sync.py --local ~/docs --drive 'MyFolder' --notebook-url 'https://...'") + return 1 + + print(f"\n📋 Final Result: {json.dumps(result, indent=2, ensure_ascii=False)}") + return 0 if result.get("status") in ["success", "complete", "up_to_date"] else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/browser_utils.py b/scripts/browser_utils.py index 60a1210..d277ef2 100755 --- a/scripts/browser_utils.py +++ b/scripts/browser_utils.py @@ -28,7 +28,7 @@ def launch_persistent_context( # Launch persistent context context = playwright.chromium.launch_persistent_context( user_data_dir=user_data_dir, - channel="chrome", # Use real Chrome + # channel="chrome", # Use Chromium instead (Chrome not installed) headless=headless, no_viewport=True, ignore_default_args=["--enable-automation"], diff --git a/scripts/package.json b/scripts/package.json new file mode 100644 index 0000000..e7a5593 --- /dev/null +++ b/scripts/package.json @@ -0,0 +1,15 @@ +{ + "name": "scripts", + "version": "1.0.0", + "main": "sync-to-drive.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "googleapis": "^169.0.0" + } +} diff --git a/scripts/sync-to-drive.js b/scripts/sync-to-drive.js new file mode 100644 index 0000000..4683da5 --- /dev/null +++ b/scripts/sync-to-drive.js @@ -0,0 +1,260 @@ +#!/usr/bin/env node + +/** + * Sync Local Folders to Google Drive for NotebookLM + * 將本地資料夾同步到 Google Drive,供 NotebookLM 使用 + * + * Usage: + * node sync-to-drive.js --local ~/PKM/Argus --drive "NotebookLM-Sources/Argus" + * node sync-to-drive.js --config ~/.claude/skills/notebooklm/sync-config.json + */ + +const fs = require('fs'); +const path = require('path'); +const { google } = require('googleapis'); + +// 憑證路徑 (使用現有的 OAuth) +const CREDENTIALS_PATH = path.join( + process.env.HOME, + 'Dropbox/PKM-Vault/.ai-butler-system/credentials/google-oauth-credentials.json' +); + +const TOKEN_PATH = path.join( + process.env.HOME, + 'Dropbox/PKM-Vault/.ai-butler-system/credentials/google-oauth-token.json' +); + +// 支援的檔案類型 +const SUPPORTED_EXTENSIONS = ['.md', '.pdf', '.txt', '.docx']; + +async function getAuthClient() { + const credentials = JSON.parse(fs.readFileSync(CREDENTIALS_PATH, 'utf8')); + const { client_secret, client_id, redirect_uris } = credentials.installed; + + const oAuth2Client = new google.auth.OAuth2( + client_id, + client_secret, + redirect_uris[0] + ); + + const token = JSON.parse(fs.readFileSync(TOKEN_PATH, 'utf8')); + oAuth2Client.setCredentials(token); + + return oAuth2Client; +} + +async function findOrCreateFolder(drive, folderPath, parentId = 'root') { + const folders = folderPath.split('/').filter(f => f); + let currentParentId = parentId; + + for (const folderName of folders) { + // 搜尋現有資料夾 + const response = await drive.files.list({ + q: `name='${folderName}' and '${currentParentId}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false`, + fields: 'files(id, name)', + spaces: 'drive' + }); + + if (response.data.files.length > 0) { + currentParentId = response.data.files[0].id; + } else { + // 建立資料夾 + const folder = await drive.files.create({ + resource: { + name: folderName, + mimeType: 'application/vnd.google-apps.folder', + parents: [currentParentId] + }, + fields: 'id' + }); + currentParentId = folder.data.id; + console.log(`📁 Created folder: ${folderName}`); + } + } + + return currentParentId; +} + +async function uploadFile(drive, localPath, folderId, existingFiles) { + const fileName = path.basename(localPath); + const ext = path.extname(localPath).toLowerCase(); + + // 檢查是否已存在 + const existing = existingFiles.find(f => f.name === fileName || f.name === fileName.replace('.md', '')); + + let mimeType; + let convertToGoogleDocs = false; + + switch (ext) { + case '.md': + case '.txt': + mimeType = 'text/plain'; + convertToGoogleDocs = true; + break; + case '.pdf': + mimeType = 'application/pdf'; + break; + case '.docx': + mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; + convertToGoogleDocs = true; + break; + default: + console.log(` ⏭️ Skipping unsupported file: ${fileName}`); + return null; + } + + const fileMetadata = { + name: convertToGoogleDocs ? fileName.replace(ext, '') : fileName, + parents: existing ? undefined : [folderId] + }; + + const media = { + mimeType: mimeType, + body: fs.createReadStream(localPath) + }; + + try { + if (existing) { + // 更新現有檔案 + const response = await drive.files.update({ + fileId: existing.id, + media: media, + fields: 'id, name, webViewLink' + }); + console.log(` 🔄 Updated: ${fileName}`); + return response.data; + } else { + // 建立新檔案 + const resource = { + ...fileMetadata, + ...(convertToGoogleDocs && { mimeType: 'application/vnd.google-apps.document' }) + }; + + const response = await drive.files.create({ + resource: resource, + media: media, + fields: 'id, name, webViewLink' + }); + console.log(` ✅ Uploaded: ${fileName}`); + return response.data; + } + } catch (error) { + console.error(` ❌ Error uploading ${fileName}:`, error.message); + return null; + } +} + +async function syncFolder(drive, localPath, driveFolderPath) { + console.log(`\n📂 Syncing: ${localPath} → ${driveFolderPath}`); + + // 確保資料夾存在 + const folderId = await findOrCreateFolder(drive, driveFolderPath); + console.log(` 📁 Drive folder ID: ${folderId}`); + + // 取得 Drive 上的現有檔案 + const existingResponse = await drive.files.list({ + q: `'${folderId}' in parents and trashed=false`, + fields: 'files(id, name, modifiedTime)', + spaces: 'drive' + }); + const existingFiles = existingResponse.data.files; + + // 讀取本地檔案 + const localFiles = fs.readdirSync(localPath).filter(f => { + const ext = path.extname(f).toLowerCase(); + return SUPPORTED_EXTENSIONS.includes(ext); + }); + + console.log(` 📄 Found ${localFiles.length} files to sync`); + + let uploaded = 0; + let updated = 0; + let skipped = 0; + + for (const file of localFiles) { + const filePath = path.join(localPath, file); + const stat = fs.statSync(filePath); + + if (!stat.isFile()) continue; + + const result = await uploadFile(drive, filePath, folderId, existingFiles); + if (result) { + if (existingFiles.find(f => f.name === file || f.name === file.replace(path.extname(file), ''))) { + updated++; + } else { + uploaded++; + } + } else { + skipped++; + } + } + + console.log(` 📊 Summary: ${uploaded} uploaded, ${updated} updated, ${skipped} skipped`); + return { folderId, uploaded, updated, skipped }; +} + +async function main() { + const args = process.argv.slice(2); + + // 解析參數 + let localPath, drivePath, configPath; + + for (let i = 0; i < args.length; i++) { + if (args[i] === '--local' && args[i + 1]) { + localPath = args[++i].replace('~', process.env.HOME); + } else if (args[i] === '--drive' && args[i + 1]) { + drivePath = args[++i]; + } else if (args[i] === '--config' && args[i + 1]) { + configPath = args[++i].replace('~', process.env.HOME); + } + } + + // 初始化 Drive API + const auth = await getAuthClient(); + const drive = google.drive({ version: 'v3', auth }); + + if (configPath) { + // 使用設定檔同步多個資料夾 + const config = JSON.parse(fs.readFileSync(configPath, 'utf8')); + console.log(`🔧 Using config: ${configPath}`); + + for (const mapping of config.sync_mappings) { + if (!mapping.enabled) continue; + + const local = mapping.local_path.replace('~', process.env.HOME); + if (!fs.existsSync(local)) { + console.log(`⚠️ Local path not found: ${local}`); + continue; + } + + await syncFolder(drive, local, mapping.drive_folder); + } + } else if (localPath && drivePath) { + // 單一資料夾同步 + if (!fs.existsSync(localPath)) { + console.error(`❌ Local path not found: ${localPath}`); + process.exit(1); + } + + await syncFolder(drive, localPath, drivePath); + } else { + console.log(` +NotebookLM Drive Sync Tool +========================== + +Usage: + node sync-to-drive.js --local --drive + node sync-to-drive.js --config + +Examples: + node sync-to-drive.js --local ~/PKM/Argus --drive "NotebookLM-Sources/Argus" + node sync-to-drive.js --config ~/.claude/skills/notebooklm/sync-config.json + +Supported file types: ${SUPPORTED_EXTENSIONS.join(', ')} + `); + } + + console.log('\n✅ Sync complete!'); +} + +main().catch(console.error); diff --git a/sync-config.json.example b/sync-config.json.example new file mode 100644 index 0000000..ce4c805 --- /dev/null +++ b/sync-config.json.example @@ -0,0 +1,29 @@ +{ + "description": "NotebookLM Google Drive Sync Configuration", + "version": "2.0.0", + "sync_mappings": [ + { + "name": "Example Notebook", + "local_path": "~/Documents/my-reports", + "drive_folder": "NotebookLM-Sources/My-Reports", + "notebook_url": "https://notebooklm.google.com/notebook/YOUR-NOTEBOOK-ID", + "enabled": true, + "description": "My research reports and documents" + }, + { + "name": "Another Notebook", + "local_path": "~/Documents/other-docs", + "drive_folder": "NotebookLM-Sources/Other-Docs", + "notebook_url": "", + "enabled": false, + "description": "Other documents (disabled)" + } + ], + "settings": { + "convert_md_to_gdoc": true, + "supported_extensions": [".md", ".pdf", ".txt", ".docx"], + "exclude_patterns": ["node_modules", ".git", ".obsidian", ".DS_Store"], + "sync_interval_hours": 1 + }, + "synced_files": {} +}