diff --git a/mobile2/.gitignore b/mobile2/.gitignore new file mode 100644 index 00000000..d914c328 --- /dev/null +++ b/mobile2/.gitignore @@ -0,0 +1,41 @@ +# Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files + +# dependencies +node_modules/ + +# Expo +.expo/ +dist/ +web-build/ +expo-env.d.ts + +# Native +.kotlin/ +*.orig.* +*.jks +*.p8 +*.p12 +*.key +*.mobileprovision + +# Metro +.metro-health-check* + +# debug +npm-debug.* +yarn-debug.* +yarn-error.* + +# macOS +.DS_Store +*.pem + +# local env files +.env*.local + +# typescript +*.tsbuildinfo + +# generated native folders +/ios +/android diff --git a/mobile2/.vscode/extensions.json b/mobile2/.vscode/extensions.json new file mode 100644 index 00000000..b7ed8377 --- /dev/null +++ b/mobile2/.vscode/extensions.json @@ -0,0 +1 @@ +{ "recommendations": ["expo.vscode-expo-tools"] } diff --git a/mobile2/.vscode/settings.json b/mobile2/.vscode/settings.json new file mode 100644 index 00000000..e2798e42 --- /dev/null +++ b/mobile2/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports": "explicit", + "source.sortMembers": "explicit" + } +} diff --git a/mobile2/agent/agent.ts b/mobile2/agent/agent.ts new file mode 100644 index 00000000..14bc53a9 --- /dev/null +++ b/mobile2/agent/agent.ts @@ -0,0 +1,167 @@ +/** + * Minimal agent loop — the smallest possible thing that ports the spirit of + * src/chrome/src/agent/agent.js to mobile. + * + * Out of scope for v0 (intentionally — port these as needs arise): + * - streaming responses (text_delta updates in chat) + * - vision / auto-screenshot + * - loop detection (general / coordinate-bucket / navigation) + * - context auto-trim & emergency trim + * - blockedDone probe + * - duplicate-submit guard + * - trace recorder + * + * What this DOES do: + * 1. Take a user message + current page meta (url/title). + * 2. Build messages = [system, ...history, user]. + * 3. Loop up to MAX_STEPS: + * a. provider.chat(messages, {tools}) + * b. If tool_calls: execute each, append assistant + tool messages, continue. + * c. If done() was called: emit summary, stop. + * d. If text only: emit text, stop. + * 4. Stream UI updates via onUpdate — the chat tab shows tool labels in + * real time so the user has feedback while the agent works. + */ +import { dispatchTool, AGENT_TOOLS, type ToolDispatchDeps } from './tools'; +import type { ChatMessage, OpenAIProvider } from './openai'; + +const MAX_STEPS = 30; + +const SYSTEM_PROMPT = `You are WebBrain, an AI agent that controls a mobile WebView browser. + +You have these tools: + - get_accessibility_tree({filter?, maxDepth?, maxChars?, ref_id?}) — read the current page as a flat indented text tree. PREFERRED first action. + - click_ax({ref_id}) — click an element by ref_id from get_accessibility_tree. + - type_ax({ref_id, text, clear?}) — type into a focusable input. After click_ax on a text field, your NEXT call must be type_ax on the same ref_id. + - navigate({url}) — load a URL. Use this if the page isn't already where the task needs to start. + - done({summary}) — call ONLY when the task is fully complete. Provide a short summary. + +Operating rules: + - Always start by calling get_accessibility_tree({filter: "visible"}) to see the current page. + - ref_ids look like "ref_42" — use them VERBATIM from the tree output. + - If a ref_id is missing or stale, re-read the tree. + - Don't guess URLs or invent ref_ids. + - Be decisive. After each action, re-read the tree to verify state, then take the next step. + - Prefer 5–10 tool calls per task. If you're past 15 calls without progress, summarize what you tried and call done.`; + +export type AgentEvent = + | { type: 'tool_call'; name: string; args: Record } + | { type: 'tool_result'; name: string; ok: boolean; preview: string } + | { type: 'text'; content: string } + | { type: 'done'; summary: string } + | { type: 'error'; message: string }; + +export type AgentInput = { + provider: OpenAIProvider; + history: ChatMessage[]; + userText: string; + pageMeta: { url: string; title: string } | null; + deps: ToolDispatchDeps; + onEvent: (e: AgentEvent) => void; +}; + +/** Truncate a value for log preview to keep onEvent payloads small. */ +function preview(v: unknown, max = 200): string { + let s: string; + try { + s = typeof v === 'string' ? v : JSON.stringify(v); + } catch { + s = String(v); + } + if (s.length > max) s = s.slice(0, max) + '…'; + return s; +} + +export async function runAgent({ + provider, + history, + userText, + pageMeta, + deps, + onEvent, +}: AgentInput): Promise { + const messages: ChatMessage[] = [ + { role: 'system', content: SYSTEM_PROMPT }, + ...history, + ]; + + // Enrich the user message with current page context (the Chrome agent + // does this in _enrichFirstUserMessage). Lets the model skip a wasted + // navigate call when the user's task starts on the page already shown. + const ctx = pageMeta ? `\n\n[Current page: ${pageMeta.url}${pageMeta.title ? ` — "${pageMeta.title}"` : ''}]` : ''; + messages.push({ role: 'user', content: userText + ctx }); + + for (let step = 0; step < MAX_STEPS; step++) { + let result; + try { + result = await provider.chat(messages, { tools: AGENT_TOOLS }); + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + onEvent({ type: 'error', message: msg }); + messages.push({ role: 'assistant', content: `[error] ${msg}` }); + return messages; + } + + // Append the assistant turn (with any tool_calls) so the next iteration + // can attach matching tool replies. + messages.push({ + role: 'assistant', + content: result.content || null, + tool_calls: result.toolCalls || undefined, + }); + + if (!result.toolCalls || result.toolCalls.length === 0) { + // Plain text response — final answer. + const text = (result.content || '').trim(); + if (text) onEvent({ type: 'text', content: text }); + return messages; + } + + // Execute each tool call and append a `tool` message per call. + for (const tc of result.toolCalls) { + let args: Record; + try { + args = tc.function.arguments ? JSON.parse(tc.function.arguments) : {}; + } catch { + args = {}; + } + + onEvent({ type: 'tool_call', name: tc.function.name, args }); + + const r = await dispatchTool(tc.function.name, args, deps); + + if (r.kind === 'done') { + onEvent({ type: 'done', summary: r.summary }); + // Still record the tool reply so the conversation is coherent if + // the user follows up. + messages.push({ + role: 'tool', + tool_call_id: tc.id, + name: tc.function.name, + content: JSON.stringify({ success: true, summary: r.summary }), + }); + return messages; + } + + const replyValue = r.kind === 'value' ? r.value : { success: false, error: r.error }; + onEvent({ + type: 'tool_result', + name: tc.function.name, + ok: r.kind === 'value', + preview: preview(replyValue), + }); + messages.push({ + role: 'tool', + tool_call_id: tc.id, + name: tc.function.name, + content: JSON.stringify(replyValue), + }); + } + } + + onEvent({ + type: 'error', + message: `Stopped: hit step cap (${MAX_STEPS}) without calling done().`, + }); + return messages; +} diff --git a/mobile2/agent/inject.ts b/mobile2/agent/inject.ts new file mode 100644 index 00000000..c7473291 --- /dev/null +++ b/mobile2/agent/inject.ts @@ -0,0 +1,562 @@ +/** + * PAGE_SCRIPT — runs inside every WebView page (via + * `injectedJavaScriptBeforeContentLoaded`). Provides: + * + * 1. `window.__generateAccessibilityTree(...)` — the same AX-tree builder + * used by the Chrome and Firefox extensions, ported verbatim. + * See src/chrome/src/content/accessibility-tree.js for the source of + * truth — this file mirrors it. KEEP IN SYNC when that file changes. + * + * 2. `window.__wb_ax_lookup(ref_id)` and `__wb_ax_suggest(...)` — used by + * the click/type handlers below. + * + * 3. `window.__wbHandle({id, method, params})` — the RPC dispatcher called + * from the React Native side via `injectJavaScript`. Each handler posts + * a `{id, ok, result|error}` JSON message back through + * `window.ReactNativeWebView.postMessage`. + * + * Methods exposed via __wbHandle: + * - get_accessibility_tree({filter, maxDepth, maxChars, ref_id}) + * - click_ax({ref_id}) + * - type_ax({ref_id, text, clear}) + * - get_page_meta() → {url, title} + * + * The script is wrapped in a guard so re-injection on subsequent loads is + * a no-op. Navigations replay the script before content loads, but a SPA + * route change keeps the same window — the guard prevents double-install. + */ +export const PAGE_SCRIPT = String.raw` +(function(){ + if (window.__wb_page_script_installed) return; + window.__wb_page_script_installed = true; + + // ─── AX TREE (port of src/chrome/src/content/accessibility-tree.js) ─── + (() => { + if (window.__wb_ax_installed) return; + window.__wb_ax_installed = true; + + if (!window.__wbElementMap) window.__wbElementMap = Object.create(null); + if (typeof window.__wbRefCounter !== 'number') window.__wbRefCounter = 0; + + const MAX_NAME_LEN = 100; + + const TAG_ROLES = { + a: 'link', button: 'button', select: 'combobox', textarea: 'textbox', + h1: 'heading', h2: 'heading', h3: 'heading', + h4: 'heading', h5: 'heading', h6: 'heading', + img: 'image', nav: 'navigation', main: 'main', + header: 'banner', footer: 'contentinfo', section: 'region', + article: 'article', aside: 'complementary', form: 'form', + table: 'table', ul: 'list', ol: 'list', li: 'listitem', label: 'label', + }; + + function getRole(el) { + const explicit = el.getAttribute('role'); + if (explicit) return explicit; + const tag = el.tagName.toLowerCase(); + if (tag === 'input') { + const t = el.getAttribute('type'); + if (t === 'submit' || t === 'button' || t === 'file') return 'button'; + if (t === 'checkbox') return 'checkbox'; + if (t === 'radio') return 'radio'; + return 'textbox'; + } + return TAG_ROLES[tag] || 'generic'; + } + + function getAccessibleName(el) { + const tag = el.tagName.toLowerCase(); + if (tag === 'select') { + const opt = el.querySelector('option[selected]') || (el.options && el.options[el.selectedIndex]); + if (opt && opt.textContent && opt.textContent.trim()) return opt.textContent.trim(); + } + const ariaLabel = el.getAttribute('aria-label'); + if (ariaLabel && ariaLabel.trim()) return ariaLabel.trim(); + const labelledby = el.getAttribute('aria-labelledby'); + if (labelledby && labelledby.trim()) { + try { + const ids = labelledby.trim().split(/\s+/); + const parts = []; + for (const id of ids) { + const ref = document.getElementById(id); + if (ref) { + const t = (ref.innerText || ref.textContent || '').trim(); + if (t) parts.push(t); + } + } + const joined = parts.join(' ').trim(); + if (joined) return joined.length > MAX_NAME_LEN ? joined.substring(0, MAX_NAME_LEN) + '...' : joined; + } catch (e) {} + } + const placeholder = el.getAttribute('placeholder'); + if (placeholder && placeholder.trim()) return placeholder.trim(); + const title = el.getAttribute('title'); + if (title && title.trim()) return title.trim(); + const alt = el.getAttribute('alt'); + if (alt && alt.trim()) return alt.trim(); + if (el.id) { + try { + const byFor = document.querySelector('label[for="' + CSS.escape(el.id) + '"]'); + if (byFor && byFor.textContent && byFor.textContent.trim()) return byFor.textContent.trim(); + } catch {} + } + if (tag === 'input') { + const t = (el.getAttribute('type') || '').toLowerCase(); + const valAttr = el.getAttribute('value'); + if ((t === 'submit' || t === 'button' || t === 'reset') && valAttr && valAttr.trim()) return valAttr.trim(); + } + if (tag === 'button' || tag === 'a' || tag === 'summary') { + let text = ''; + for (const child of el.childNodes) { + if (child.nodeType === Node.TEXT_NODE) text += child.textContent; + } + if (text.trim()) return text.trim(); + const deep = (el.innerText || el.textContent || '').trim(); + if (deep) return deep.length > MAX_NAME_LEN ? deep.substring(0, MAX_NAME_LEN) + '...' : deep; + } + if (/^h[1-6]$/.test(tag)) { + const s = el.textContent; + if (s && s.trim()) return s.trim().substring(0, MAX_NAME_LEN); + } + if (tag === 'img') return ''; + const roleAttr = (el.getAttribute('role') || '').toLowerCase(); + const LABEL_FROM_DESCENDANTS_ROLES = new Set([ + 'option', 'menuitem', 'menuitemcheckbox', 'menuitemradio', + 'tab', 'treeitem', 'row', 'gridcell', 'cell', 'listitem', + ]); + if (LABEL_FROM_DESCENDANTS_ROLES.has(roleAttr) || tag === 'li') { + const s = (el.innerText || el.textContent || '').trim(); + if (s) return s.length > MAX_NAME_LEN ? s.substring(0, MAX_NAME_LEN) + '...' : s; + } + if (tag === 'input' || tag === 'textarea' || tag === 'select' + || roleAttr === 'textbox' || roleAttr === 'searchbox' + || roleAttr === 'spinbutton' || roleAttr === 'combobox') { + try { + const tryText = (node) => { + if (!node) return ''; + if (node.nodeType === Node.TEXT_NODE) return (node.textContent || '').trim(); + if (node.nodeType === Node.ELEMENT_NODE) { + const t = (node.innerText || node.textContent || '').trim(); + if (t && t.length <= 60) return t; + } + return ''; + }; + let sib = el.previousSibling; + for (let i = 0; i < 4 && sib; i++, sib = sib.previousSibling) { + const t = tryText(sib); + if (t) return t.length > MAX_NAME_LEN ? t.substring(0, MAX_NAME_LEN) + '...' : t; + } + const parent = el.parentElement; + if (parent) { + let psib = parent.previousSibling; + for (let i = 0; i < 3 && psib; i++, psib = psib.previousSibling) { + const t = tryText(psib); + if (t) return t.length > MAX_NAME_LEN ? t.substring(0, MAX_NAME_LEN) + '...' : t; + } + } + } catch {} + } + let text = ''; + for (const child of el.childNodes) { + if (child.nodeType === Node.TEXT_NODE) text += child.textContent; + } + if (text.trim() && text.trim().length >= 3) { + const v = text.trim(); + return v.length > MAX_NAME_LEN ? v.substring(0, MAX_NAME_LEN) + '...' : v; + } + return ''; + } + + function isVisible(el) { + const cs = window.getComputedStyle(el); + if (cs.display === 'none') return false; + if (cs.visibility === 'hidden') return false; + if (cs.opacity === '0') return false; + if (el.offsetWidth <= 0 || el.offsetHeight <= 0) return false; + return true; + } + function isInViewport(el) { + const r = el.getBoundingClientRect(); + return r.top < window.innerHeight && r.bottom > 0 && r.left < window.innerWidth && r.right > 0; + } + + const INTERACTIVE_TAGS = new Set(['a', 'button', 'input', 'select', 'textarea', 'details', 'summary']); + const LANDMARK_TAGS = new Set(['h1','h2','h3','h4','h5','h6','nav','main','header','footer','section','article','aside']); + + function isInteractive(el) { + const tag = el.tagName.toLowerCase(); + if (INTERACTIVE_TAGS.has(tag)) return true; + if (el.getAttribute('onclick') !== null) return true; + if (el.getAttribute('tabindex') !== null) return true; + const role = el.getAttribute('role'); + if (role === 'button' || role === 'link') return true; + if (el.getAttribute('contenteditable') === 'true') return true; + return false; + } + function isLandmark(el) { + if (LANDMARK_TAGS.has(el.tagName.toLowerCase())) return true; + return el.getAttribute('role') !== null; + } + + const SKIP_TAGS = new Set(['script', 'style', 'meta', 'link', 'title', 'noscript']); + const USEFUL_NON_INTERACTIVE_ROLES = new Set([ + 'dialog','alertdialog','alert','status','listbox','menu','menubar', + 'tablist','radiogroup','option','menuitem','menuitemcheckbox', + 'menuitemradio','tab','combobox','textbox','searchbox','heading', + 'form','main','navigation','banner','contentinfo','region', + 'complementary','progressbar','slider','spinbutton', + ]); + + function shouldInclude(el, opts) { + const tag = el.tagName.toLowerCase(); + if (SKIP_TAGS.has(tag)) return false; + if (opts.filter !== 'all' && el.getAttribute('aria-hidden') === 'true') return false; + if (opts.filter !== 'all' && !isVisible(el)) return false; + if (opts.filter !== 'all' && !opts.refId) { + if (!isInViewport(el)) return false; + } + if (opts.filter === 'interactive') return isInteractive(el); + const role = getRole(el); + if (opts.filter === 'visible') { + if (isInteractive(el)) return true; + if (/^h[1-6]$/.test(tag)) return true; + if (USEFUL_NON_INTERACTIVE_ROLES.has(role)) return true; + return false; + } + if (isInteractive(el)) return true; + if (isLandmark(el)) return true; + if (getAccessibleName(el).length > 0) return true; + return role !== null && role !== 'generic' && role !== 'image'; + } + + function getOrMintRef(el) { + for (const key in window.__wbElementMap) { + if (window.__wbElementMap[key].deref() === el) return key; + } + const key = 'ref_' + (++window.__wbRefCounter); + window.__wbElementMap[key] = new WeakRef(el); + return key; + } + function sweepDeadRefs() { + for (const key in window.__wbElementMap) { + if (!window.__wbElementMap[key].deref()) delete window.__wbElementMap[key]; + } + } + + function formatLine(el, depth) { + const role = getRole(el); + let name = getAccessibleName(el); + const ref = getOrMintRef(el); + let line = ' '.repeat(depth) + role; + if (name) { + name = name.replace(/\s+/g, ' ').substring(0, MAX_NAME_LEN).replace(/"/g, '\\"'); + line += ' "' + name + '"'; + } + line += ' [' + ref + ']'; + const href = el.getAttribute('href'); + if (href) line += ' href="' + href + '"'; + const type = el.getAttribute('type'); + if (type) line += ' type="' + type + '"'; + const ph = el.getAttribute('placeholder'); + if (ph) line += ' placeholder="' + ph + '"'; + const tag = el.tagName.toLowerCase(); + if (tag === 'input' || tag === 'textarea') { + const inputType = (el.getAttribute('type') || 'text').toLowerCase(); + const skipValueTypes = new Set(['submit','button','reset','file','checkbox','radio','image','hidden','color','range','password']); + if (!skipValueTypes.has(inputType)) { + const v = (el.value == null ? '' : String(el.value)); + if (v && v !== name) { + const trimmed = v.length > 60 ? v.substring(0, 60) + '...' : v; + line += ' value="' + trimmed.replace(/"/g, '\\"') + '"'; + } + } + } + return line; + } + function formatOption(opt, depth) { + const ref = getOrMintRef(opt); + const rawName = opt.textContent ? opt.textContent.trim() : ''; + const name = rawName.replace(/\s+/g, ' ').substring(0, MAX_NAME_LEN).replace(/"/g, '\\"'); + let line = ' '.repeat(depth) + 'option'; + if (name) line += ' "' + name + '"'; + line += ' [' + ref + ']'; + if (opt.selected) line += ' (selected)'; + if (opt.value && opt.value !== rawName) line += ' value="' + opt.value.replace(/"/g, '\\"') + '"'; + return line; + } + + function walk(el, depth, opts, lines) { + if (depth > opts.maxDepth) return; + if (!el || !el.tagName) return; + if (depth > 0 && opts._skipOverlaySet && opts._skipOverlaySet.has(el)) return; + const included = shouldInclude(el, opts) || (opts.refId != null && depth === 0); + if (included) { + lines.push(formatLine(el, depth)); + if (el.tagName.toLowerCase() === 'select' && el.options) { + for (const opt of el.options) lines.push(formatOption(opt, depth + 1)); + } + } + if (el.children && depth < opts.maxDepth) { + const nextDepth = included ? depth + 1 : depth; + for (const child of el.children) walk(child, nextDepth, opts, lines); + } + } + + function generateAccessibilityTree(filter, maxDepth, maxChars, refId) { + try { + const effFilter = filter || 'all'; + const defaultDepth = effFilter === 'all' ? 15 : 10; + const defaultChars = effFilter === 'visible' ? 3000 + : effFilter === 'interactive' ? 3500 + : null; + const opts = { + filter: effFilter, + maxDepth: maxDepth != null ? maxDepth : defaultDepth, + refId: refId || null, + }; + const effMaxChars = maxChars != null ? maxChars : defaultChars; + const viewport = { width: window.innerWidth, height: window.innerHeight }; + const lines = []; + if (refId) { + const weak = window.__wbElementMap[refId]; + if (!weak) return { error: 'Element with ref_id ' + refId + ' not found.', pageContent: '', viewport }; + const el = weak.deref(); + if (!el) { + delete window.__wbElementMap[refId]; + return { error: 'Element with ref_id ' + refId + ' no longer exists.', pageContent: '', viewport }; + } + walk(el, 0, opts, lines); + } else if (document.body) { + const overlaySelectors = [ + '[role=listbox]','[role=menu]','[role=dialog]','[role=alertdialog]', + '[aria-modal="true"]','[role=combobox][aria-expanded="true"]','dialog[open]', + ]; + const overlayEls = []; + const seen = new WeakSet(); + try { + for (const sel of overlaySelectors) { + const nodes = document.querySelectorAll(sel); + for (const n of nodes) { + if (seen.has(n)) continue; + if (!n.isConnected) continue; + let ancIsOverlay = false; + for (let p = n.parentElement; p; p = p.parentElement) { + if (seen.has(p)) { ancIsOverlay = true; break; } + } + if (ancIsOverlay) continue; + try { + const r = n.getBoundingClientRect(); + if (r.width < 1 || r.height < 1) continue; + const s = window.getComputedStyle(n); + if (s.visibility === 'hidden' || s.display === 'none' || parseFloat(s.opacity) === 0) continue; + } catch (e) { continue; } + seen.add(n); + overlayEls.push(n); + } + } + } catch (e) {} + if (overlayEls.length) { + lines.push('[open overlays — rendered first so they survive truncation]'); + for (const n of overlayEls) walk(n, 0, opts, lines); + lines.push('[/open overlays]'); + opts._skipOverlaySet = seen; + } + walk(document.body, 0, opts, lines); + } + sweepDeadRefs(); + const output = lines.join('\n'); + if (effMaxChars != null && output.length > effMaxChars) { + if (filter && filter !== 'all' && maxChars == null) { + let truncated = output.slice(0, effMaxChars); + const lastNl = truncated.lastIndexOf('\n'); + if (lastNl > 0) truncated = truncated.slice(0, lastNl); + const omitted = lines.length - truncated.split('\n').length; + truncated += '\n[tree truncated: ' + omitted + ' more nodes omitted]'; + return { pageContent: truncated, viewport, truncated: true }; + } + return { error: 'Output exceeds ' + effMaxChars + ' chars.', pageContent: '', viewport }; + } + return { pageContent: output, viewport }; + } catch (e) { + return { + error: 'Error generating accessibility tree: ' + (e && e.message || 'Unknown'), + pageContent: '', + viewport: { width: window.innerWidth, height: window.innerHeight }, + }; + } + } + + function lookup(refId) { + const weak = window.__wbElementMap[refId]; + if (!weak) return null; + const el = weak.deref(); + if (!el) { delete window.__wbElementMap[refId]; return null; } + return el; + } + + function suggestNearRefs(requestedRefId, limit) { + const cap = typeof limit === 'number' ? limit : 6; + const m = /^ref_(\d+)$/.exec(String(requestedRefId || '')); + const targetNum = m ? parseInt(m[1], 10) : null; + const live = []; + for (const key in window.__wbElementMap) { + const weak = window.__wbElementMap[key]; + const el = weak && weak.deref(); + if (!el) continue; + try { if (!el.isConnected || !isVisible(el)) continue; } catch { continue; } + const km = /^ref_(\d+)$/.exec(key); + const n = km ? parseInt(km[1], 10) : 0; + live.push({ ref: key, n, role: getRole(el), name: getAccessibleName(el) || '', interactive: isInteractive(el) }); + } + if (targetNum != null) { + live.sort((a, b) => { + if (a.interactive !== b.interactive) return a.interactive ? -1 : 1; + return Math.abs(a.n - targetNum) - Math.abs(b.n - targetNum); + }); + } else { + live.sort((a, b) => { + const aN = a.name ? 1 : 0, bN = b.name ? 1 : 0; + if (aN !== bN) return bN - aN; + if (a.interactive !== b.interactive) return a.interactive ? -1 : 1; + return b.n - a.n; + }); + } + return live.slice(0, cap).map(x => ({ + ref: x.ref, role: x.role, + name: x.name.length > 40 ? x.name.slice(0, 40) + '…' : x.name, + interactive: x.interactive, + })); + } + + window.__generateAccessibilityTree = generateAccessibilityTree; + window.__wb_ax_lookup = lookup; + window.__wb_ax_suggest = suggestNearRefs; + })(); + + // ─── RPC dispatcher ────────────────────────────────────────────────── + function reply(id, ok, payload) { + try { + const msg = ok ? { id: id, ok: true, result: payload } : { id: id, ok: false, error: String(payload) }; + window.ReactNativeWebView && window.ReactNativeWebView.postMessage(JSON.stringify(msg)); + } catch (e) { + try { + window.ReactNativeWebView && window.ReactNativeWebView.postMessage(JSON.stringify({ id: id, ok: false, error: String(e && e.message || e) })); + } catch {} + } + } + + function clickHandler(p) { + const ref_id = p && p.ref_id; + if (typeof ref_id !== 'string') return { success: false, error: 'ref_id (string) required' }; + if (typeof window.__wb_ax_lookup !== 'function') return { success: false, error: 'AX tree not ready' }; + const el = window.__wb_ax_lookup(ref_id); + if (!el) { + let suggestions = []; + try { if (typeof window.__wb_ax_suggest === 'function') suggestions = window.__wb_ax_suggest(ref_id, 6); } catch {} + return { success: false, error: 'ref_id ' + ref_id + ' not found. Re-read the accessibility tree.', suggestions: suggestions }; + } + try { el.scrollIntoView({ block: 'center', inline: 'center' }); } catch {} + try { el.focus({ preventScroll: true }); } catch {} + const r = el.getBoundingClientRect(); + el.click(); + const tag = el.tagName ? el.tagName.toLowerCase() : ''; + let isTextEntry = false; + if (tag === 'textarea') isTextEntry = true; + else if (tag === 'input') { + const inputType = (el.type || 'text').toLowerCase(); + const nonText = new Set(['checkbox','radio','file','submit','button','reset','image','color','range','hidden']); + isTextEntry = !nonText.has(inputType); + } else if (el.isContentEditable) isTextEntry = true; + const resp = { + success: true, method: 'click_ax', ref_id: ref_id, tag: tag, + rect: { x: Math.round(r.x), y: Math.round(r.y), w: Math.round(r.width), h: Math.round(r.height) }, + }; + if (tag === 'a') { + const href = el.getAttribute('href') || ''; + if (href) resp.href = href; + } + if (isTextEntry) { + resp.focused = true; + resp.next_required = 'type_ax'; + resp.hint = 'Element is now focused. Next call MUST be type_ax({ref_id: "' + ref_id + '", text: "..."}).'; + } + return resp; + } + + function typeHandler(p) { + const ref_id = p && p.ref_id; + const text = p && p.text; + const clear = !!(p && p.clear); + if (typeof ref_id !== 'string') return { success: false, error: 'ref_id (string) required' }; + if (typeof text !== 'string') return { success: false, error: 'text (string) required' }; + if (typeof window.__wb_ax_lookup !== 'function') return { success: false, error: 'AX tree not ready' }; + const el = window.__wb_ax_lookup(ref_id); + if (!el) return { success: false, error: 'ref_id ' + ref_id + ' not found.' }; + try { el.scrollIntoView({ block: 'center', inline: 'center' }); } catch {} + try { el.focus({ preventScroll: true }); } catch {} + if (el.isContentEditable) { + if (clear) { + try { + const sel = window.getSelection(); + const r = document.createRange(); + r.selectNodeContents(el); + sel.removeAllRanges(); + sel.addRange(r); + document.execCommand('delete'); + } catch {} + } + try { document.execCommand('insertText', false, text); } catch { + el.textContent = (clear ? '' : (el.textContent || '')) + text; + el.dispatchEvent(new Event('input', { bubbles: true })); + } + return { success: true, method: 'type_ax_contenteditable', ref_id: ref_id }; + } + if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') { + if (el.tagName === 'INPUT') { + const inputType = (el.type || 'text').toLowerCase(); + const nonTypeable = new Set(['checkbox','radio','file','submit','button','reset','image','color','range','hidden']); + if (nonTypeable.has(inputType)) { + return { success: false, error: 'Input type ' + inputType + ' is not text-typeable. Use click_ax instead.' }; + } + } + if (clear) el.value = ''; + const proto = el.tagName === 'TEXTAREA' ? window.HTMLTextAreaElement.prototype : window.HTMLInputElement.prototype; + const desc = Object.getOwnPropertyDescriptor(proto, 'value'); + const setter = desc && desc.set; + const newVal = (clear ? '' : (el.value || '')) + text; + if (setter) setter.call(el, newVal); else el.value = newVal; + el.dispatchEvent(new Event('input', { bubbles: true })); + el.dispatchEvent(new Event('change', { bubbles: true })); + return { success: true, method: 'type_ax_input', ref_id: ref_id }; + } + return { success: false, error: 'Element is not typeable (tag=' + el.tagName + ').' }; + } + + function pageMetaHandler() { + return { + url: window.location.href, + title: document.title || '', + viewport: { width: window.innerWidth, height: window.innerHeight }, + }; + } + + window.__wbHandle = function(msg) { + const id = msg && msg.id; + const method = msg && msg.method; + const params = (msg && msg.params) || {}; + try { + if (method === 'get_accessibility_tree') { + const r = window.__generateAccessibilityTree(params.filter, params.maxDepth, params.maxChars, params.ref_id); + return reply(id, true, r); + } + if (method === 'click_ax') return reply(id, true, clickHandler(params)); + if (method === 'type_ax') return reply(id, true, typeHandler(params)); + if (method === 'get_page_meta') return reply(id, true, pageMetaHandler()); + return reply(id, false, 'Unknown method: ' + method); + } catch (e) { + reply(id, false, e && e.message || String(e)); + } + }; +})(); +true; +`; diff --git a/mobile2/agent/openai.ts b/mobile2/agent/openai.ts new file mode 100644 index 00000000..7dda9a53 --- /dev/null +++ b/mobile2/agent/openai.ts @@ -0,0 +1,137 @@ +/** + * Minimal OpenAI-compatible chat completions client. + * + * Ported from src/chrome/src/providers/openai.js with a few simplifications + * for v0: + * - No streaming (we'll add chatStream later for live deltas in chat). + * - No vision branch yet (the WebView doesn't capture screenshots in v0). + * - No fetch-with-fallback (no localhost/PNA workaround needed on mobile). + * + * Works against any OpenAI-shape endpoint: api.openai.com, OpenRouter, + * LM Studio, Ollama (`ollama serve` exposes /v1/chat/completions), etc. + */ + +export type ChatRole = 'system' | 'user' | 'assistant' | 'tool'; + +export type ChatMessage = { + role: ChatRole; + content: string | null; + // assistant-only: + tool_calls?: ToolCall[]; + // tool-only: + tool_call_id?: string; + name?: string; +}; + +export type ToolCall = { + id: string; + type: 'function'; + function: { name: string; arguments: string }; +}; + +export type ToolSchema = { + type: 'function'; + function: { + name: string; + description: string; + parameters: Record; + }; +}; + +export type ChatResult = { + content: string; + toolCalls: ToolCall[] | null; + usage: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number } | null; +}; + +export type OpenAIConfig = { + apiKey: string; + baseUrl?: string; // defaults to https://api.openai.com/v1 + model?: string; // defaults to gpt-5.4-mini +}; + +const DEFAULT_BASE = 'https://api.openai.com/v1'; +const DEFAULT_MODEL = 'gpt-5.4-mini'; + +// gpt-5 / gpt-4.1 / o-series use a different field for max tokens and reject +// non-default temperature. Detect by model-name regex (mirrors the Chrome +// extension's logic). +function isNewContract(model: string): boolean { + return /^(gpt-5|gpt-4\.1|o1|o3|o4)/i.test(model); +} + +export class OpenAIProvider { + constructor(private config: OpenAIConfig) {} + + get model(): string { + return this.config.model || DEFAULT_MODEL; + } + + get baseUrl(): string { + return (this.config.baseUrl || DEFAULT_BASE).replace(/\/+$/, ''); + } + + async chat( + messages: ChatMessage[], + options: { + tools?: ToolSchema[]; + temperature?: number; + maxTokens?: number; + toolChoice?: 'auto' | 'none' | 'required'; + } = {}, + ): Promise { + const newContract = isNewContract(this.model); + const body: Record = { + model: this.model, + messages, + stream: false, + }; + if (!newContract) { + body.temperature = options.temperature ?? 0.3; + body.max_tokens = options.maxTokens ?? 4096; + } else { + body.max_completion_tokens = options.maxTokens ?? 4096; + } + if (options.tools && options.tools.length > 0) { + body.tools = options.tools; + body.tool_choice = options.toolChoice || 'auto'; + } + + const url = `${this.baseUrl}/chat/completions`; + let res: Response; + try { + res = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.config.apiKey}`, + }, + body: JSON.stringify(body), + }); + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + throw new Error(`Network error contacting ${url}: ${msg}`); + } + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`OpenAI error ${res.status}: ${text || res.statusText}`); + } + + const data = (await res.json()) as { + choices?: Array<{ + message?: { + content?: string; + tool_calls?: ToolCall[]; + }; + }>; + usage?: ChatResult['usage']; + }; + const message = data.choices?.[0]?.message; + return { + content: message?.content || '', + toolCalls: message?.tool_calls && message.tool_calls.length > 0 ? message.tool_calls : null, + usage: data.usage || null, + }; + } +} diff --git a/mobile2/agent/settings-store.ts b/mobile2/agent/settings-store.ts new file mode 100644 index 00000000..418dd1ca --- /dev/null +++ b/mobile2/agent/settings-store.ts @@ -0,0 +1,76 @@ +/** + * Persistent settings: API key, base URL, model. + * + * On native: backed by expo-secure-store (encrypted Keychain on iOS, + * EncryptedSharedPreferences on Android — appropriate for an API key). + * On web (Expo web target): falls back to localStorage so settings + * persistence still works in dev. + */ +import * as SecureStore from 'expo-secure-store'; +import { Platform } from 'react-native'; + +export type AgentSettings = { + apiKey: string; + baseUrl: string; + model: string; +}; + +export const DEFAULT_SETTINGS: AgentSettings = { + apiKey: '', + baseUrl: 'https://api.openai.com/v1', + model: 'gpt-5.4-mini', +}; + +const KEYS = { + apiKey: 'wb_api_key', + baseUrl: 'wb_base_url', + model: 'wb_model', +} as const; + +async function read(key: string): Promise { + if (Platform.OS === 'web') { + try { + return typeof localStorage !== 'undefined' ? localStorage.getItem(key) : null; + } catch { + return null; + } + } + try { + return await SecureStore.getItemAsync(key); + } catch { + return null; + } +} + +async function write(key: string, value: string): Promise { + if (Platform.OS === 'web') { + try { + if (typeof localStorage !== 'undefined') localStorage.setItem(key, value); + } catch {} + return; + } + try { + await SecureStore.setItemAsync(key, value); + } catch {} +} + +export async function loadSettings(): Promise { + const [apiKey, baseUrl, model] = await Promise.all([ + read(KEYS.apiKey), + read(KEYS.baseUrl), + read(KEYS.model), + ]); + return { + apiKey: apiKey ?? DEFAULT_SETTINGS.apiKey, + baseUrl: baseUrl || DEFAULT_SETTINGS.baseUrl, + model: model || DEFAULT_SETTINGS.model, + }; +} + +export async function saveSettings(s: AgentSettings): Promise { + await Promise.all([ + write(KEYS.apiKey, s.apiKey), + write(KEYS.baseUrl, s.baseUrl), + write(KEYS.model, s.model), + ]); +} diff --git a/mobile2/agent/tools.ts b/mobile2/agent/tools.ts new file mode 100644 index 00000000..8cd11674 --- /dev/null +++ b/mobile2/agent/tools.ts @@ -0,0 +1,160 @@ +/** + * Tool definitions for the mobile WebBrain agent. + * + * v0 ships the minimum viable set: + * - get_accessibility_tree → read the page (preferred) + * - click_ax → click by ref_id + * - type_ax → type into a focused/typeable element by ref_id + * - navigate → change the WebView URL + * - done → terminal: signal task complete with summary + * + * Schemas mirror src/chrome/src/agent/tools.js exactly so the same prompts + * and conversation shapes work across desktop and mobile. + */ +import * as rpc from './webview-rpc'; +import type { ToolSchema } from './openai'; + +export const AGENT_TOOLS: ToolSchema[] = [ + { + type: 'function', + function: { + name: 'get_accessibility_tree', + description: + 'PREFERRED page-reading tool. Returns the page as a flat, indented text representation of its accessibility tree. Each kept node is one line of the form `role "accessible name" [ref_id] href="..." type="..." placeholder="..."`. Indentation shows hierarchy. ref_ids are STABLE across calls — re-use them in click_ax / type_ax.', + parameters: { + type: 'object', + properties: { + filter: { + type: 'string', + enum: ['all', 'visible', 'interactive'], + description: + 'Which nodes to include. "visible" (in-viewport, visible) is a good default for navigation. "interactive" shows only clickable/typeable things. "all" traverses the whole DOM.', + }, + maxDepth: { type: 'number', description: 'Max tree depth (default 15 for "all", 10 otherwise).' }, + maxChars: { type: 'number', description: 'Hard char cap on the rendered tree.' }, + ref_id: { type: 'string', description: 'Optional anchor — return just this element and its subtree.' }, + }, + required: [], + }, + }, + }, + { + type: 'function', + function: { + name: 'click_ax', + description: + 'Click an element by its ref_id from get_accessibility_tree. Scrolls into view, focuses, then clicks. ref_ids are stable across calls.', + parameters: { + type: 'object', + properties: { + ref_id: { type: 'string', description: 'A ref_id from get_accessibility_tree, e.g. "ref_42".' }, + }, + required: ['ref_id'], + }, + }, + }, + { + type: 'function', + function: { + name: 'type_ax', + description: + 'Type text into a focusable input/textarea/contenteditable by its ref_id. Uses native value setters so React picks up the change.', + parameters: { + type: 'object', + properties: { + ref_id: { type: 'string', description: 'A ref_id from get_accessibility_tree.' }, + text: { type: 'string', description: 'Text to type.' }, + clear: { type: 'boolean', description: 'Clear existing content before typing (default false).' }, + }, + required: ['ref_id', 'text'], + }, + }, + }, + { + type: 'function', + function: { + name: 'navigate', + description: 'Navigate the browser tab to a URL. Use this to start the task on a specific site.', + parameters: { + type: 'object', + properties: { + url: { type: 'string', description: 'Absolute URL to navigate to.' }, + }, + required: ['url'], + }, + }, + }, + { + type: 'function', + function: { + name: 'done', + description: + 'Signal the task is FULLY complete and return a short summary. Only call when you have actually accomplished the user request OR you have exhausted alternatives.', + parameters: { + type: 'object', + properties: { + summary: { type: 'string', description: 'One- or two-sentence summary of what was accomplished.' }, + }, + required: ['summary'], + }, + }, + }, +]; + +/** + * Out-of-band signal returned from `dispatchTool` when the agent calls + * `done`. The agent loop watches for this and stops iterating. + */ +export type ToolResult = + | { kind: 'value'; value: unknown } + | { kind: 'done'; summary: string } + | { kind: 'error'; error: string }; + +export type ToolDispatchDeps = { + /** Replace the URL the WebView is loading. Returns when navigation begins. */ + navigate: (url: string) => Promise; +}; + +/** + * Execute one tool call. Returns the result the LLM should see (as a JSON + * value), or a `done` sentinel that ends the loop, or an error string. + */ +export async function dispatchTool( + name: string, + args: Record, + deps: ToolDispatchDeps, +): Promise { + try { + switch (name) { + case 'get_accessibility_tree': { + const r = await rpc.call('get_accessibility_tree', args); + return { kind: 'value', value: r }; + } + case 'click_ax': { + const r = await rpc.call('click_ax', args); + return { kind: 'value', value: r }; + } + case 'type_ax': { + const r = await rpc.call('type_ax', args); + return { kind: 'value', value: r }; + } + case 'navigate': { + const url = String(args.url || ''); + if (!url) return { kind: 'error', error: 'navigate requires a url argument' }; + await deps.navigate(url); + // Give the WebView a beat to start loading. The next get_accessibility_tree + // call will block on the page-script being ready anyway. + await new Promise((r) => setTimeout(r, 800)); + return { kind: 'value', value: { success: true, url } }; + } + case 'done': { + const summary = String(args.summary || ''); + return { kind: 'done', summary }; + } + default: + return { kind: 'error', error: `Unknown tool: ${name}` }; + } + } catch (e: unknown) { + return { kind: 'error', error: e instanceof Error ? e.message : String(e) }; + } +} diff --git a/mobile2/agent/webview-rpc.ts b/mobile2/agent/webview-rpc.ts new file mode 100644 index 00000000..cd12a6f3 --- /dev/null +++ b/mobile2/agent/webview-rpc.ts @@ -0,0 +1,115 @@ +/** + * WebView <-> React Native RPC. + * + * The agent runs in the React Native JS context but its tools (read AX tree, + * click an element, type text) need to execute inside the page running in the + * WebView. We bridge the two with a request/response RPC over + * `WebView.injectJavaScript` (RN→page) and `window.ReactNativeWebView.postMessage` + * (page→RN). + * + * Lifecycle: + * 1. Browser screen mounts a and calls registerWebView(ref). + * 2. Page-side script (mobile/agent/inject.ts → PAGE_SCRIPT) installs a + * handler dispatcher on window.__wbHandle(msg). Every page navigation + * re-runs that script via injectedJavaScriptBeforeContentLoaded. + * 3. Agent calls webRpc.call('click_ax', { ref_id }). + * 4. We assign a unique id, store a pending promise, then invoke + * webRef.injectJavaScript('window.__wbHandle({...}, "")'). + * 5. Page side runs the handler, posts back JSON {id, ok, result|error}. + * 6. WebView's onMessage delivers it; we resolve/reject the matching + * pending promise. + * + * Calls made before a WebView is registered are queued and flushed on + * register, so the agent can issue tool calls during the very first frame. + */ +import type WebView from 'react-native-webview'; + +type Pending = { + resolve: (value: unknown) => void; + reject: (reason?: unknown) => void; + timer: ReturnType | null; +}; + +const TIMEOUT_MS = 15000; + +let webRef: WebView | null = null; +let nextId = 1; +const pending = new Map(); +const queue: Array<{ id: string; payload: string }> = []; + +function flushQueue() { + if (!webRef) return; + while (queue.length) { + const { payload } = queue.shift()!; + webRef.injectJavaScript(payload); + } +} + +export function registerWebView(ref: WebView | null) { + webRef = ref; + if (ref) flushQueue(); +} + +export function call( + method: string, + params: Record = {}, +): Promise { + const id = String(nextId++); + const body = JSON.stringify({ id, method, params }); + // The trailing `true;` is required: injectJavaScript on iOS warns when the + // injected code's last expression isn't serializable. + const payload = `(function(){try{window.__wbHandle && window.__wbHandle(${body});}catch(e){window.ReactNativeWebView&&window.ReactNativeWebView.postMessage(JSON.stringify({id:${JSON.stringify(id)},ok:false,error:String(e&&e.message||e)}));}})();true;`; + + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + if (pending.has(id)) { + pending.delete(id); + reject(new Error(`RPC ${method} timed out after ${TIMEOUT_MS}ms`)); + } + }, TIMEOUT_MS); + pending.set(id, { + resolve: resolve as (value: unknown) => void, + reject, + timer, + }); + if (webRef) { + webRef.injectJavaScript(payload); + } else { + queue.push({ id, payload }); + } + }); +} + +/** + * Hand a message from back to the RPC layer. Returns + * true if the message was an RPC reply (and should be consumed silently), + * false if it should be handled by other listeners. + */ +export function handleWebViewMessage(raw: string): boolean { + let parsed: { id?: string; ok?: boolean; result?: unknown; error?: string }; + try { + parsed = JSON.parse(raw); + } catch { + return false; + } + if (!parsed || typeof parsed.id !== 'string') return false; + const p = pending.get(parsed.id); + if (!p) return true; // late reply, swallow + pending.delete(parsed.id); + if (p.timer) clearTimeout(p.timer); + if (parsed.ok) { + p.resolve(parsed.result); + } else { + p.reject(new Error(parsed.error || 'RPC error')); + } + return true; +} + +/** Reset on hard reset (logout, settings change, etc.). Tests use this. */ +export function _resetForTesting() { + webRef = null; + pending.forEach((p) => p.timer && clearTimeout(p.timer)); + pending.clear(); + queue.length = 0; + nextId = 1; +} diff --git a/mobile2/app.json b/mobile2/app.json new file mode 100644 index 00000000..8cddff13 --- /dev/null +++ b/mobile2/app.json @@ -0,0 +1,40 @@ +{ + "expo": { + "name": "mobile2", + "slug": "mobile2", + "version": "1.0.0", + "orientation": "portrait", + "icon": "./assets/images/icon.png", + "scheme": "mobile2", + "userInterfaceStyle": "automatic", + "newArchEnabled": true, + "splash": { + "image": "./assets/images/splash-icon.png", + "resizeMode": "contain", + "backgroundColor": "#ffffff" + }, + "ios": { + "supportsTablet": true + }, + "android": { + "adaptiveIcon": { + "foregroundImage": "./assets/images/adaptive-icon.png", + "backgroundColor": "#ffffff" + }, + "edgeToEdgeEnabled": true, + "predictiveBackGestureEnabled": false + }, + "web": { + "bundler": "metro", + "output": "static", + "favicon": "./assets/images/favicon.png" + }, + "plugins": [ + "expo-router", + "expo-secure-store" + ], + "experiments": { + "typedRoutes": true + } + } +} diff --git a/mobile2/app/+html.tsx b/mobile2/app/+html.tsx new file mode 100644 index 00000000..cb31090e --- /dev/null +++ b/mobile2/app/+html.tsx @@ -0,0 +1,38 @@ +import { ScrollViewStyleReset } from 'expo-router/html'; + +// This file is web-only and used to configure the root HTML for every +// web page during static rendering. +// The contents of this function only run in Node.js environments and +// do not have access to the DOM or browser APIs. +export default function Root({ children }: { children: React.ReactNode }) { + return ( + + + + + + + {/* + Disable body scrolling on web. This makes ScrollView components work closer to how they do on native. + However, body scrolling is often nice to have for mobile web. If you want to enable it, remove this line. + */} + + + {/* Using raw CSS styles as an escape-hatch to ensure the background color never flickers in dark-mode. */} +