diff --git a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj index 1e7dbda4..11e77362 100644 --- a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj +++ b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj @@ -128,6 +128,7 @@ 9D3C69602F367CF700E641A5 /* iPhone */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = iPhone; sourceTree = ""; }; 9D85EB992F35EC46006C44D1 /* OpenClaw */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); name = OpenClaw; path = CameraAccess/OpenClaw; sourceTree = SOURCE_ROOT; }; E699CC962E8150670052C240 /* CameraAccessTests */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = CameraAccessTests; sourceTree = ""; }; + A1C4D5E62F0B000100000001 /* Utilities */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = Utilities; sourceTree = ""; }; /* End PBXFileSystemSynchronizedRootGroup section */ /* Begin PBXFrameworksBuildPhase section */ @@ -216,6 +217,7 @@ 8FD96B792E6F0A9800F56AB1 /* CameraAccessApp.swift */, 8FD96B7B2E6F0A9800F56AB1 /* Info.plist */, 9D85EB992F35EC46006C44D1 /* OpenClaw */, + A1C4D5E62F0B000100000001 /* Utilities */, ); path = CameraAccess; sourceTree = ""; @@ -306,6 +308,7 @@ fileSystemSynchronizedGroups = ( 9D3C69602F367CF700E641A5 /* iPhone */, 9D85EB992F35EC46006C44D1 /* OpenClaw */, + A1C4D5E62F0B000100000001 /* Utilities */, ); name = CameraAccess; productName = CameraAccess; @@ -726,7 +729,7 @@ repositoryURL = "https://github.com/facebook/meta-wearables-dat-ios"; requirement = { kind = exactVersion; - version = 0.4.0; + version = 0.5.0; }; }; 9DD6CAFC2F3C62DA00ED7098 /* XCRemoteSwiftPackageReference "WebRTC" */ = { diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/AppIcon.png b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/AppIcon.png new file mode 100644 index 00000000..7f168d46 Binary files /dev/null and b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/AppIcon.png differ diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json index f7757d41..cefcc878 100644 --- a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json +++ b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -1,7 +1,7 @@ { "images" : [ { - "filename" : "imagine_a_film_camera_in_the_style.jpeg", + "filename" : "AppIcon.png", "idiom" : "universal", "platform" : "ios", "size" : "1024x1024" diff --git a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg b/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg deleted file mode 100644 index b91090c8..00000000 Binary files a/samples/CameraAccess/CameraAccess/Assets.xcassets/AppIcon.appiconset/imagine_a_film_camera_in_the_style.jpeg and /dev/null differ diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift index 5c124f66..4fec8c9d 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift @@ -40,6 +40,24 @@ enum GeminiConfig { Never call execute silently -- the user needs verbal confirmation that you heard them and are working on it. The tool may take several seconds to complete, so the acknowledgment lets them know something is happening. For messages, confirm recipient and content before delegating unless clearly urgent. + + You also have a save_photo tool. Use it when the user asks you to capture, save, snap, photograph, or take a picture of what they're looking at. In the description parameter, briefly describe what you see in the frame. This saves the current camera view directly to their iPhone photo library -- it's instant, no network needed. + + You have a save_note tool. Use it to record observations, measurements, hazards, or action items as field notes. Always save important findings during inspections or when the worker mentions something worth recording. Categorize notes when appropriate: observation, hazard, measurement, or action_item. The worker may need these notes for their field report later. + + You have access to the current job context injected at the start of this session, including the worker's name, job details, site address, and GPS location. Use this context to give relevant, job-aware responses. Address the worker by name. Reference the job and site when relevant. + + You have a knowledge_lookup tool. Use it when the user says "look this up", "what is this", "find the specs", or asks about something they're looking at. First READ any visible text from the camera (part numbers, model names, labels, serial numbers), then call knowledge_lookup with a specific search query. Include the manufacturer and model number if visible. Results are automatically saved as reference notes. + + You have a generate_report tool. Use it when the user says "generate my field report", "create a report", "compile my findings", "write up my notes", etc. This compiles all session data (job details, notes, photos, GPS, timestamps) into a professional PDF and opens the share sheet so they can immediately AirDrop, email, or save the report. Confirm that the report is being generated before calling the tool. + + You have start_inspection and stop_inspection tools for proactive inspection mode. When the user says "start inspection", "begin inspection", "inspect this area", or similar, call start_inspection. If they mention a focus area (e.g. "focus on electrical" or "check for water damage"), include it in the focus parameter. When they say "stop inspection" or "end inspection", call stop_inspection. + + During inspection mode, you will receive periodic [INSPECTION] prompts. IMPORTANT: Only respond if you genuinely see something the inspector should know about -- damage, wear, safety hazards, code violations, unusual conditions, or noteworthy changes. If nothing stands out in the current view, stay completely silent. Do NOT acknowledge the inspection prompt or say "everything looks fine". Keep observations brief, specific, and actionable. + + You have start_safety_monitor and stop_safety_monitor tools. When the user says "enable safety", "watch for hazards", "start safety monitoring", or similar, call start_safety_monitor. When they say "stop safety" or "disable safety monitoring", call stop_safety_monitor. Safety monitoring runs independently from inspection mode — both can be active simultaneously. + + During safety monitoring, you will receive periodic [SAFETY CHECK] prompts. ONLY speak if you see a GENUINE safety hazard — missing PPE, electrical dangers, fall risks, fire hazards, or OSHA violations. If nothing unsafe is visible, stay completely silent. When you DO spot a hazard, be urgent, clear, and specific. Always save hazards as notes with category "hazard". """ // User-configurable values (Settings screen overrides, falling back to Secrets.swift) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift index 248f2f02..4721204c 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiLiveService.swift @@ -33,6 +33,7 @@ class GeminiLiveService: ObservableObject { private let delegate = WebSocketDelegate() private var urlSession: URLSession! private let sendQueue = DispatchQueue(label: "gemini.send", qos: .userInitiated) + var sessionContextString: String? init() { let config = URLSessionConfiguration.default @@ -189,7 +190,13 @@ class GeminiLiveService: ObservableObject { ], "systemInstruction": [ "parts": [ - ["text": GeminiConfig.systemInstruction] + ["text": { + var instruction = GeminiConfig.systemInstruction + if let ctx = sessionContextString, !ctx.isEmpty { + instruction += "\n\n" + ctx + } + return instruction + }()] ] ], "tools": [ diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index e7d9d902..a429b319 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -11,6 +11,14 @@ class GeminiSessionViewModel: ObservableObject { @Published var aiTranscript: String = "" @Published var toolCallStatus: ToolCallStatus = .idle @Published var openClawConnectionState: OpenClawConnectionState = .notConfigured + @Published var isInspectionActive: Bool = false + @Published var isSafetyMonitorActive: Bool = false + @Published var sessionContext: SessionContext? + @Published var reportURLToShare: URL? + + weak var webrtcVM: WebRTCSessionViewModel? + var frameProvider: (() -> UIImage?)? + private let geminiService = GeminiLiveService() private let openClawBridge = OpenClawBridge() private var toolCallRouter: ToolCallRouter? @@ -18,6 +26,11 @@ class GeminiSessionViewModel: ObservableObject { private let eventClient = OpenClawEventClient() private var lastVideoFrameTime: Date = .distantPast private var stateObservation: Task? + private var inspectionTimer: Task? + private var inspectionFocus: String? + private var safetyTimer: Task? + private let locationService = LocationService() + @Published var spatialService: SpatialLocalizationService? var streamingMode: StreamingMode = .glasses @@ -31,6 +44,23 @@ class GeminiSessionViewModel: ObservableObject { isGeminiActive = true + // Initialize session context + let context = SessionContext() + sessionContext = context + locationService.requestPermissionAndStart() + if let coord = locationService.currentCoordinate { + context.coordinates = (lat: coord.latitude, lon: coord.longitude) + context.reverseGeocodedAddress = locationService.currentAddress + } + + // Start spatial localization (Multiset VPS if configured, else GPS fallback) + let spatial = SpatialLocalizationService(locationService: locationService) + spatialService = spatial + spatial.start() + context.spatialService = spatial + + geminiService.sessionContextString = context.contextString() + // Wire audio callbacks audioManager.onAudioCaptured = { [weak self] data in guard let self else { return } @@ -64,6 +94,8 @@ class GeminiSessionViewModel: ObservableObject { Task { @MainActor in self.userTranscript += text self.aiTranscript = "" + // Broadcast to WebRTC viewers + self.webrtcVM?.broadcastTranscript(speaker: "User", text: text) } } @@ -71,6 +103,8 @@ class GeminiSessionViewModel: ObservableObject { guard let self else { return } Task { @MainActor in self.aiTranscript += text + // Broadcast to WebRTC viewers + self.webrtcVM?.broadcastTranscript(speaker: "AI", text: text) } } @@ -91,6 +125,35 @@ class GeminiSessionViewModel: ObservableObject { // Wire tool call handling toolCallRouter = ToolCallRouter(bridge: openClawBridge) + // Wire router handlers + toolCallRouter?.frameProvider = frameProvider + toolCallRouter?.inspectionHandler = { [weak self] action, focus in + guard let self else { return } + if action == "start" { + self.startInspection(focus: focus) + } else { + self.stopInspection() + } + } + toolCallRouter?.safetyHandler = { [weak self] action in + guard let self else { return } + if action == "start" { + self.startSafetyMonitor() + } else { + self.stopSafetyMonitor() + } + } + toolCallRouter?.noteHandler = { [weak self] note, category in + guard let self else { return } + self.sessionContext?.addNote(note, category: category ?? "general") + } + toolCallRouter?.sessionContextProvider = { [weak self] in + return self?.sessionContext + } + toolCallRouter?.reportShareHandler = { [weak self] url in + self?.reportURLToShare = url + } + geminiService.onToolCall = { [weak self] toolCall in guard let self else { return } Task { @MainActor in @@ -119,6 +182,11 @@ class GeminiSessionViewModel: ObservableObject { self.isModelSpeaking = self.geminiService.isModelSpeaking self.toolCallStatus = self.openClawBridge.lastToolCallStatus self.openClawConnectionState = self.openClawBridge.connectionState + // Update location in context + if let coord = self.locationService.currentCoordinate { + self.sessionContext?.coordinates = (lat: coord.latitude, lon: coord.longitude) + self.sessionContext?.reverseGeocodedAddress = self.locationService.currentAddress + } } } @@ -174,9 +242,28 @@ class GeminiSessionViewModel: ObservableObject { } eventClient.connect() } + + // Auto-start inspection if configured + if SettingsManager.shared.inspectionAutoStart { + startInspection(focus: nil) + } + + // Auto-start safety monitor if configured + if SettingsManager.shared.safetyMonitorAutoStart { + startSafetyMonitor() + } + + // Enter collaborative mode on WebRTC if active + if let webrtc = webrtcVM, webrtc.isActive { + webrtc.enterCollaborativeMode() + } } func stopSession() { + stopInspection() + stopSafetyMonitor() + spatialService?.stop() + spatialService = nil eventClient.disconnect() toolCallRouter?.cancelAll() toolCallRouter = nil @@ -190,6 +277,7 @@ class GeminiSessionViewModel: ObservableObject { userTranscript = "" aiTranscript = "" toolCallStatus = .idle + sessionContext = nil } func sendVideoFrameIfThrottled(image: UIImage) { @@ -201,4 +289,63 @@ class GeminiSessionViewModel: ObservableObject { geminiService.sendVideoFrame(image: image) } + // MARK: - Inspection Mode + + func startInspection(focus: String?) { + guard !isInspectionActive else { return } + isInspectionActive = true + inspectionFocus = focus + let interval = TimeInterval(SettingsManager.shared.inspectionInterval) + NSLog("[Inspection] Started (interval: %.0fs, focus: %@)", interval, focus ?? "general") + + inspectionTimer = Task { [weak self] in + while !Task.isCancelled { + try? await Task.sleep(nanoseconds: UInt64(interval * 1_000_000_000)) + guard !Task.isCancelled else { break } + guard let self, self.isGeminiActive, self.connectionState == .ready else { continue } + var prompt = "[INSPECTION] Analyze the current camera view." + if let focus = self.inspectionFocus { + prompt += " Focus area: \(focus)." + } + prompt += " Only speak if you see something noteworthy. If nothing stands out, stay completely silent." + self.geminiService.sendTextMessage(prompt) + } + } + } + + func stopInspection() { + guard isInspectionActive else { return } + inspectionTimer?.cancel() + inspectionTimer = nil + isInspectionActive = false + inspectionFocus = nil + NSLog("[Inspection] Stopped") + } + + // MARK: - Safety Monitor + + func startSafetyMonitor() { + guard !isSafetyMonitorActive else { return } + isSafetyMonitorActive = true + let interval = TimeInterval(SettingsManager.shared.safetyMonitorInterval) + NSLog("[Safety] Monitor started (interval: %.0fs)", interval) + + safetyTimer = Task { [weak self] in + while !Task.isCancelled { + try? await Task.sleep(nanoseconds: UInt64(interval * 1_000_000_000)) + guard !Task.isCancelled else { break } + guard let self, self.isGeminiActive, self.connectionState == .ready else { continue } + let prompt = "[SAFETY CHECK] Scan the current view for safety hazards. ONLY speak if you see a genuine danger — missing PPE, electrical hazards, fall risks, fire risks, or OSHA violations. If everything looks safe, stay completely silent." + self.geminiService.sendTextMessage(prompt) + } + } + } + + func stopSafetyMonitor() { + guard isSafetyMonitorActive else { return } + safetyTimer?.cancel() + safetyTimer = nil + isSafetyMonitorActive = false + NSLog("[Safety] Monitor stopped") + } } diff --git a/samples/CameraAccess/CameraAccess/Info.plist b/samples/CameraAccess/CameraAccess/Info.plist index 12cc4016..6eae6a87 100644 --- a/samples/CameraAccess/CameraAccess/Info.plist +++ b/samples/CameraAccess/CameraAccess/Info.plist @@ -71,6 +71,12 @@ This app uses the microphone to have voice conversations with the AI assistant while streaming from your glasses. NSPhotoLibraryAddUsageDescription This app needs access to save photos captured from your glasses. + NSLocationWhenInUseUsageDescription + VisionClaw uses your location to tag field reports with GPS coordinates and auto-fill site addresses for job context. + UIFileSharingEnabled + + LSSupportsOpeningDocumentsInPlace + NSAppTransportSecurity NSAllowsLocalNetworking diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index c7222a28..5433ebae 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -70,7 +70,7 @@ enum ToolCallStatus: Equatable { case .executing(let name): return "Running: \(name)..." case .completed(let name): return "Done: \(name)" case .failed(let name, let err): return "Failed: \(name) - \(err)" - case .cancelled(let name): return "Cancelled: \(name)" + case .cancelled: return "" } } @@ -85,9 +85,130 @@ enum ToolCallStatus: Equatable { enum ToolDeclarations { static func allDeclarations() -> [[String: Any]] { - return [execute] + return [execute, savePhoto, saveNote, generateReport, knowledgeLookup, startInspection, stopInspection, startSafetyMonitor, stopSafetyMonitor] } + static let savePhoto: [String: Any] = [ + "name": "save_photo", + "description": "Save what you currently see through the glasses camera to the user's photo library. Use when the user asks to capture, save, snap, or photograph what they're looking at.", + "parameters": [ + "type": "object", + "properties": [ + "description": [ + "type": "string", + "description": "Brief description of what's being captured for the user's confirmation" + ] + ], + "required": ["description"] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let saveNote: [String: Any] = [ + "name": "save_note", + "description": "Save an observation, measurement, hazard, or action item as a field note for the current job session. Use whenever the worker mentions something worth recording, or when inspection mode detects something noteworthy. Always save important findings.", + "parameters": [ + "type": "object", + "properties": [ + "note": [ + "type": "string", + "description": "The observation or note to save" + ], + "category": [ + "type": "string", + "description": "Category: observation, hazard, measurement, or action_item" + ] + ], + "required": ["note"] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let generateReport: [String: Any] = [ + "name": "generate_report", + "description": "Generate a structured PDF field report from the current session. Compiles all job context, notes, GPS location, and timestamps into a professional report document. Use when the user says 'generate my field report', 'create a report', 'compile my findings', 'write up my report', etc.", + "parameters": [ + "type": "object", + "properties": [ + "title": [ + "type": "string", + "description": "Optional custom title for the report. Defaults to 'Field Report' if not provided." + ] + ], + "required": [] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let knowledgeLookup: [String: Any] = [ + "name": "knowledge_lookup", + "description": "Look up technical information, specs, part numbers, model details, or any reference material. Use when the user says 'look this up', 'what is this', 'find the specs', 'search for this part number', etc. Read any visible text from the camera first, then search for detailed information about it.", + "parameters": [ + "type": "object", + "properties": [ + "query": [ + "type": "string", + "description": "The search query — include part numbers, model names, manufacturer, or any text read from the camera" + ], + "context": [ + "type": "string", + "description": "Brief description of what the worker is looking at for context" + ] + ], + "required": ["query"] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let startInspection: [String: Any] = [ + "name": "start_inspection", + "description": "Start proactive inspection mode. The AI will continuously analyze the camera feed and speak up when it spots damage, safety hazards, code violations, wear, or anything noteworthy. Use when the user says 'start inspection', 'begin inspection', 'inspect this', or similar.", + "parameters": [ + "type": "object", + "properties": [ + "focus": [ + "type": "string", + "description": "Optional focus area for the inspection (e.g. 'electrical', 'plumbing', 'structural', 'safety'). Leave empty for general inspection." + ] + ], + "required": [] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let stopInspection: [String: Any] = [ + "name": "stop_inspection", + "description": "Stop proactive inspection mode. Use when the user says 'stop inspection', 'end inspection', 'done inspecting', or similar.", + "parameters": [ + "type": "object", + "properties": [:], + "required": [] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let startSafetyMonitor: [String: Any] = [ + "name": "start_safety_monitor", + "description": "Start continuous safety monitoring. The AI will actively watch for safety hazards, OSHA violations, missing PPE, electrical dangers, fall risks, chemical exposure, fire risks, and unsafe conditions. Use when the user says 'enable safety', 'start safety monitoring', 'watch for hazards', or similar.", + "parameters": [ + "type": "object", + "properties": [:], + "required": [] + ] as [String: Any], + "behavior": "BLOCKING" + ] + + static let stopSafetyMonitor: [String: Any] = [ + "name": "stop_safety_monitor", + "description": "Stop safety monitoring. Use when the user says 'stop safety monitoring', 'disable safety', or similar.", + "parameters": [ + "type": "object", + "properties": [:], + "required": [] + ] as [String: Any], + "behavior": "BLOCKING" + ] + static let execute: [String: Any] = [ "name": "execute", "description": "Your only way to take action. You have no memory, storage, or ability to do anything on your own -- use this tool for everything: sending messages, searching the web, adding to lists, setting reminders, creating notes, research, drafts, scheduling, smart home control, app interactions, or any request that goes beyond answering a question. When in doubt, use this tool.", diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index a20babf4..1dc200d9 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -1,4 +1,5 @@ import Foundation +import UIKit @MainActor class ToolCallRouter { @@ -6,13 +7,17 @@ class ToolCallRouter { private var inFlightTasks: [String: Task] = [:] private var consecutiveFailures = 0 private let maxConsecutiveFailures = 3 + var frameProvider: (() -> UIImage?)? + var inspectionHandler: ((_ action: String, _ focus: String?) -> Void)? + var safetyHandler: ((_ action: String) -> Void)? + var noteHandler: ((_ note: String, _ category: String?) -> Void)? + var sessionContextProvider: (() -> SessionContext?)? + var reportShareHandler: ((URL) -> Void)? init(bridge: OpenClawBridge) { self.bridge = bridge } - /// Route a tool call from Gemini to OpenClaw. Calls sendResponse with the - /// JSON dictionary to send back as a toolResponse message. func handleToolCall( _ call: GeminiFunctionCall, sendResponse: @escaping ([String: Any]) -> Void @@ -23,7 +28,85 @@ class ToolCallRouter { NSLog("[ToolCall] Received: %@ (id: %@) args: %@", callName, callId, String(describing: call.args)) - // Circuit breaker: stop sending tool calls after repeated failures + // Local tools — no OpenClaw round-trip needed + + if callName == "save_photo" { + let task = Task { @MainActor in + let result = await handleSavePhoto(call) + let response = self.buildToolResponse(callId: callId, name: callName, result: result) + sendResponse(response) + self.inFlightTasks.removeValue(forKey: callId) + } + inFlightTasks[callId] = task + return + } + + if callName == "generate_report" { + let task = Task { @MainActor in + let result = await self.handleGenerateReport(call) + let response = self.buildToolResponse(callId: callId, name: callName, result: result) + sendResponse(response) + self.inFlightTasks.removeValue(forKey: callId) + } + inFlightTasks[callId] = task + return + } + + if callName == "knowledge_lookup" { + let task = Task { @MainActor in + let result = await self.handleKnowledgeLookup(call) + let response = self.buildToolResponse(callId: callId, name: callName, result: result) + sendResponse(response) + self.inFlightTasks.removeValue(forKey: callId) + } + inFlightTasks[callId] = task + return + } + + if callName == "save_note" { + let note = call.args["note"] as? String ?? "" + let category = call.args["category"] as? String + noteHandler?(note, category) + let response = buildToolResponse(callId: callId, name: callName, + result: .success("Note saved: \(note)")) + sendResponse(response) + return + } + + if callName == "start_inspection" { + let focus = call.args["focus"] as? String + inspectionHandler?("start", focus) + let response = buildToolResponse(callId: callId, name: callName, + result: .success("Inspection mode started\(focus.map { ". Focus: \($0)" } ?? "")")) + sendResponse(response) + return + } + + if callName == "stop_inspection" { + inspectionHandler?("stop", nil) + let response = buildToolResponse(callId: callId, name: callName, + result: .success("Inspection mode stopped")) + sendResponse(response) + return + } + + if callName == "start_safety_monitor" { + safetyHandler?("start") + let response = buildToolResponse(callId: callId, name: callName, + result: .success("Safety monitoring activated. Watching for hazards, PPE violations, and unsafe conditions.")) + sendResponse(response) + return + } + + if callName == "stop_safety_monitor" { + safetyHandler?("stop") + let response = buildToolResponse(callId: callId, name: callName, + result: .success("Safety monitoring deactivated")) + sendResponse(response) + return + } + + // Circuit breaker: stop sending remote tool calls after repeated failures if consecutiveFailures >= maxConsecutiveFailures { NSLog("[ToolCall] Circuit breaker open (%d consecutive failures), rejecting %@", consecutiveFailures, callId) @@ -64,7 +147,6 @@ class ToolCallRouter { inFlightTasks[callId] = task } - /// Cancel specific in-flight tool calls (from toolCallCancellation) func cancelToolCalls(ids: [String]) { for id in ids { if let task = inFlightTasks[id] { @@ -76,7 +158,6 @@ class ToolCallRouter { bridge.lastToolCallStatus = .cancelled(ids.first ?? "unknown") } - /// Cancel all in-flight tool calls (on session stop) func cancelAll() { for (id, task) in inFlightTasks { NSLog("[ToolCall] Cancelling in-flight call: %@", id) @@ -86,6 +167,64 @@ class ToolCallRouter { consecutiveFailures = 0 } + // MARK: - Local Tool Handlers + + private func handleKnowledgeLookup(_ call: GeminiFunctionCall) async -> ToolResult { + let query = call.args["query"] as? String ?? "" + let context = call.args["context"] as? String + guard !query.isEmpty else { return .failure("No search query provided.") } + NSLog("[ToolCall] knowledge_lookup: query='%@' context='%@'", query, context ?? "none") + let result: ToolResult + if GeminiConfig.isOpenClawConfigured { + let searchTask = "Search for technical information, specs, or documentation about: \(query)" + (context.map { ". Context: \($0)" } ?? "") + result = await bridge.delegateTask(task: searchTask, toolName: "knowledge_lookup") + } else { + let searchResult = await WebSearchService.search(query) + switch searchResult { + case .success(let text): result = .success(text) + case .failure(let error): result = .failure("Search failed: \(error.localizedDescription)") + } + } + if case .success(let text) = result { + noteHandler?("Lookup: \(query) — \(String(text.prefix(300)))", "reference") + } + return result + } + + private func handleGenerateReport(_ call: GeminiFunctionCall) async -> ToolResult { + guard let context = sessionContextProvider?() else { + return .failure("No active session context. Start a session first.") + } + let title = call.args["title"] as? String ?? "Field Report" + NSLog("[ToolCall] generate_report: creating PDF (%@)", title) + let generator = ReportGenerator(context: context) + let result = await generator.generatePDF(title: title) + switch result { + case .success(let fileURL): + reportShareHandler?(fileURL) + return .success("Report generated with \(context.notes.count) notes and \(context.photosSaved) photos. The share sheet is now open so the user can send it via AirDrop, email, or save to Files.") + case .failure(let error): + return .failure("Report generation failed: \(error.localizedDescription)") + } + } + + private func handleSavePhoto(_ call: GeminiFunctionCall) async -> ToolResult { + guard let image = frameProvider?() else { + NSLog("[ToolCall] save_photo: no frame available") + return .failure("No video frame available to save. Make sure the camera is streaming.") + } + let description = call.args["description"] as? String ?? "photo" + NSLog("[ToolCall] save_photo: saving frame (%@)", description) + let result = await PhotoSaver.save(image) + switch result { + case .success: + sessionContextProvider?()?.addPhoto(image: image, description: description) + return .success("Photo saved to camera roll: \(description)") + case .failure(let error): + return .failure("Failed to save photo: \(error.localizedDescription)") + } + } + // MARK: - Private private func buildToolResponse( diff --git a/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift b/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift index 8d63a557..db8cda2e 100644 --- a/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift +++ b/samples/CameraAccess/CameraAccess/Settings/SettingsManager.swift @@ -16,6 +16,18 @@ final class SettingsManager { case speakerOutputEnabled case videoStreamingEnabled case proactiveNotificationsEnabled + case inspectionInterval + case inspectionAutoStart + case safetyMonitorInterval + case safetyMonitorAutoStart + case workerName + case defaultJobId + case defaultJobDescription + case defaultSiteAddress + case multisetClientId + case multisetClientSecret + case multisetMapCode + case multisetEnabled } private init() {} @@ -85,13 +97,99 @@ final class SettingsManager { set { defaults.set(newValue, forKey: Key.proactiveNotificationsEnabled.rawValue) } } + // MARK: - Inspection + + var inspectionInterval: Int { + get { + let stored = defaults.integer(forKey: Key.inspectionInterval.rawValue) + return stored != 0 ? stored : 10 + } + set { defaults.set(newValue, forKey: Key.inspectionInterval.rawValue) } + } + + var inspectionAutoStart: Bool { + get { defaults.bool(forKey: Key.inspectionAutoStart.rawValue) } + set { defaults.set(newValue, forKey: Key.inspectionAutoStart.rawValue) } + } + + // MARK: - Safety Monitor + + var safetyMonitorInterval: Int { + get { + let stored = defaults.integer(forKey: Key.safetyMonitorInterval.rawValue) + return stored != 0 ? stored : 15 + } + set { defaults.set(newValue, forKey: Key.safetyMonitorInterval.rawValue) } + } + + var safetyMonitorAutoStart: Bool { + get { defaults.bool(forKey: Key.safetyMonitorAutoStart.rawValue) } + set { defaults.set(newValue, forKey: Key.safetyMonitorAutoStart.rawValue) } + } + + // MARK: - Field Worker + + var workerName: String { + get { defaults.string(forKey: Key.workerName.rawValue) ?? "" } + set { defaults.set(newValue, forKey: Key.workerName.rawValue) } + } + + var defaultJobId: String { + get { defaults.string(forKey: Key.defaultJobId.rawValue) ?? "" } + set { defaults.set(newValue, forKey: Key.defaultJobId.rawValue) } + } + + var defaultJobDescription: String { + get { defaults.string(forKey: Key.defaultJobDescription.rawValue) ?? "" } + set { defaults.set(newValue, forKey: Key.defaultJobDescription.rawValue) } + } + + var defaultSiteAddress: String { + get { defaults.string(forKey: Key.defaultSiteAddress.rawValue) ?? "" } + set { defaults.set(newValue, forKey: Key.defaultSiteAddress.rawValue) } + } + + // MARK: - Multiset VPS + + var multisetClientId: String { + get { defaults.string(forKey: Key.multisetClientId.rawValue) ?? Secrets.multisetClientId } + set { defaults.set(newValue, forKey: Key.multisetClientId.rawValue) } + } + + var multisetClientSecret: String { + get { defaults.string(forKey: Key.multisetClientSecret.rawValue) ?? Secrets.multisetClientSecret } + set { defaults.set(newValue, forKey: Key.multisetClientSecret.rawValue) } + } + + var multisetMapCode: String { + get { defaults.string(forKey: Key.multisetMapCode.rawValue) ?? Secrets.multisetMapCode } + set { defaults.set(newValue, forKey: Key.multisetMapCode.rawValue) } + } + + var multisetEnabled: Bool { + get { defaults.object(forKey: Key.multisetEnabled.rawValue) as? Bool ?? true } + set { defaults.set(newValue, forKey: Key.multisetEnabled.rawValue) } + } + + /// True when all Multiset credentials + a map code are present and VPS is enabled. + var isMultisetConfigured: Bool { + return multisetEnabled + && !multisetClientId.isEmpty + && !multisetClientSecret.isEmpty + && !multisetMapCode.isEmpty + } + // MARK: - Reset func resetAll() { for key in [Key.geminiAPIKey, .geminiSystemPrompt, .openClawHost, .openClawPort, .openClawHookToken, .openClawGatewayToken, .webrtcSignalingURL, .speakerOutputEnabled, .videoStreamingEnabled, - .proactiveNotificationsEnabled] { + .proactiveNotificationsEnabled, + .inspectionInterval, .inspectionAutoStart, + .safetyMonitorInterval, .safetyMonitorAutoStart, + .workerName, .defaultJobId, .defaultJobDescription, .defaultSiteAddress, + .multisetClientId, .multisetClientSecret, .multisetMapCode, .multisetEnabled] { defaults.removeObject(forKey: key.rawValue) } } diff --git a/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift b/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift index 8e22fe33..87fd65b8 100644 --- a/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift +++ b/samples/CameraAccess/CameraAccess/Settings/SettingsView.swift @@ -14,6 +14,18 @@ struct SettingsView: View { @State private var speakerOutputEnabled: Bool = false @State private var videoStreamingEnabled: Bool = true @State private var proactiveNotificationsEnabled: Bool = true + @State private var inspectionInterval: String = "10" + @State private var inspectionAutoStart: Bool = false + @State private var safetyMonitorInterval: String = "15" + @State private var safetyMonitorAutoStart: Bool = false + @State private var multisetEnabled: Bool = true + @State private var multisetClientId: String = "" + @State private var multisetClientSecret: String = "" + @State private var multisetMapCode: String = "" + @State private var workerName: String = "" + @State private var defaultJobId: String = "" + @State private var defaultJobDescription: String = "" + @State private var defaultSiteAddress: String = "" @State private var showResetConfirmation = false var body: some View { @@ -104,6 +116,89 @@ struct SettingsView: View { Toggle("Proactive Notifications", isOn: $proactiveNotificationsEnabled) } + Section(header: Text("Spatial Positioning (Multiset VPS)"), footer: Text("Sub-5cm indoor localization. Scan your facility with the Multiset Mapper app first, then paste the Map Code below. Leave Map Code blank to stay on GPS-only.")) { + Toggle("Enable Multiset VPS", isOn: $multisetEnabled) + VStack(alignment: .leading, spacing: 4) { + Text("Client ID") + .font(.caption) + .foregroundColor(.secondary) + TextField("UUID from developer.multiset.ai", text: $multisetClientId) + .autocapitalization(.none) + .disableAutocorrection(true) + .font(.system(.body, design: .monospaced)) + } + VStack(alignment: .leading, spacing: 4) { + Text("Client Secret") + .font(.caption) + .foregroundColor(.secondary) + SecureField("Secret", text: $multisetClientSecret) + .font(.system(.body, design: .monospaced)) + } + VStack(alignment: .leading, spacing: 4) { + Text("Map Code") + .font(.caption) + .foregroundColor(.secondary) + TextField("Code from scanned map", text: $multisetMapCode) + .autocapitalization(.none) + .disableAutocorrection(true) + .font(.system(.body, design: .monospaced)) + } + } + + Section(header: Text("Safety Monitor"), footer: Text("Continuously watches for safety hazards, OSHA violations, and dangerous conditions through the camera.")) { + VStack(alignment: .leading, spacing: 4) { + Text("Check Interval (seconds)") + .font(.caption) + .foregroundColor(.secondary) + TextField("15", text: $safetyMonitorInterval) + .keyboardType(.numberPad) + .font(.system(.body, design: .monospaced)) + } + Toggle("Auto-Start Safety Monitor", isOn: $safetyMonitorAutoStart) + } + + Section(header: Text("Inspection"), footer: Text("Proactive inspection mode analyzes the camera feed at regular intervals and speaks up when it spots issues.")) { + VStack(alignment: .leading, spacing: 4) { + Text("Inspection Interval (seconds)") + .font(.caption) + .foregroundColor(.secondary) + TextField("10", text: $inspectionInterval) + .keyboardType(.numberPad) + .font(.system(.body, design: .monospaced)) + } + Toggle("Auto-Start Inspection", isOn: $inspectionAutoStart) + } + + Section(header: Text("Field Worker"), footer: Text("Pre-fill job context for field sessions. This information is injected into the AI system prompt and included in reports.")) { + VStack(alignment: .leading, spacing: 4) { + Text("Worker Name") + .font(.caption) + .foregroundColor(.secondary) + TextField("Your name", text: $workerName) + .autocapitalization(.words) + } + VStack(alignment: .leading, spacing: 4) { + Text("Default Job ID") + .font(.caption) + .foregroundColor(.secondary) + TextField("e.g. WO-2024-001", text: $defaultJobId) + .autocapitalization(.allCharacters) + .disableAutocorrection(true) + } + VStack(alignment: .leading, spacing: 4) { + Text("Job Description") + .font(.caption) + .foregroundColor(.secondary) + TextField("e.g. Quarterly HVAC inspection", text: $defaultJobDescription) + } + VStack(alignment: .leading, spacing: 4) { + Text("Site Address") + .font(.caption) + .foregroundColor(.secondary) + TextField("e.g. 123 Main St, Building A", text: $defaultSiteAddress) + } + } + Section { Button("Reset to Defaults") { showResetConfirmation = true @@ -153,6 +248,18 @@ struct SettingsView: View { speakerOutputEnabled = settings.speakerOutputEnabled videoStreamingEnabled = settings.videoStreamingEnabled proactiveNotificationsEnabled = settings.proactiveNotificationsEnabled + inspectionInterval = String(settings.inspectionInterval) + inspectionAutoStart = settings.inspectionAutoStart + safetyMonitorInterval = String(settings.safetyMonitorInterval) + safetyMonitorAutoStart = settings.safetyMonitorAutoStart + workerName = settings.workerName + defaultJobId = settings.defaultJobId + defaultJobDescription = settings.defaultJobDescription + defaultSiteAddress = settings.defaultSiteAddress + multisetEnabled = settings.multisetEnabled + multisetClientId = settings.multisetClientId + multisetClientSecret = settings.multisetClientSecret + multisetMapCode = settings.multisetMapCode } private func save() { @@ -168,5 +275,21 @@ struct SettingsView: View { settings.speakerOutputEnabled = speakerOutputEnabled settings.videoStreamingEnabled = videoStreamingEnabled settings.proactiveNotificationsEnabled = proactiveNotificationsEnabled + if let interval = Int(inspectionInterval.trimmingCharacters(in: .whitespacesAndNewlines)), interval > 0 { + settings.inspectionInterval = interval + } + settings.inspectionAutoStart = inspectionAutoStart + if let interval = Int(safetyMonitorInterval.trimmingCharacters(in: .whitespacesAndNewlines)), interval > 0 { + settings.safetyMonitorInterval = interval + } + settings.safetyMonitorAutoStart = safetyMonitorAutoStart + settings.workerName = workerName.trimmingCharacters(in: .whitespacesAndNewlines) + settings.defaultJobId = defaultJobId.trimmingCharacters(in: .whitespacesAndNewlines) + settings.defaultJobDescription = defaultJobDescription.trimmingCharacters(in: .whitespacesAndNewlines) + settings.defaultSiteAddress = defaultSiteAddress.trimmingCharacters(in: .whitespacesAndNewlines) + settings.multisetEnabled = multisetEnabled + settings.multisetClientId = multisetClientId.trimmingCharacters(in: .whitespacesAndNewlines) + settings.multisetClientSecret = multisetClientSecret.trimmingCharacters(in: .whitespacesAndNewlines) + settings.multisetMapCode = multisetMapCode.trimmingCharacters(in: .whitespacesAndNewlines) } } diff --git a/samples/CameraAccess/CameraAccess/Utilities/LocationService.swift b/samples/CameraAccess/CameraAccess/Utilities/LocationService.swift new file mode 100644 index 00000000..acad33c3 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/LocationService.swift @@ -0,0 +1,55 @@ +import CoreLocation +import Foundation + +class LocationService: NSObject, ObservableObject, CLLocationManagerDelegate { + @Published var currentCoordinate: CLLocationCoordinate2D? + @Published var currentAddress: String? + @Published var authorizationStatus: CLAuthorizationStatus = .notDetermined + + private let manager = CLLocationManager() + private let geocoder = CLGeocoder() + + override init() { + super.init() + manager.delegate = self + manager.desiredAccuracy = kCLLocationAccuracyBest + authorizationStatus = manager.authorizationStatus + } + + func requestPermissionAndStart() { + switch manager.authorizationStatus { + case .notDetermined: + manager.requestWhenInUseAuthorization() + case .authorizedWhenInUse, .authorizedAlways: + manager.requestLocation() + default: + NSLog("[Location] Authorization denied: %d", manager.authorizationStatus.rawValue) + } + } + + func locationManager(_ manager: CLLocationManager, didUpdateLocations locations: [CLLocation]) { + guard let location = locations.last else { return } + currentCoordinate = location.coordinate + NSLog("[Location] Updated: %.5f, %.5f", location.coordinate.latitude, location.coordinate.longitude) + geocoder.reverseGeocodeLocation(location) { [weak self] placemarks, error in + guard let placemark = placemarks?.first else { return } + let parts = [placemark.subThoroughfare, placemark.thoroughfare, placemark.locality, placemark.administrativeArea].compactMap { $0 } + let address = parts.joined(separator: " ") + DispatchQueue.main.async { + self?.currentAddress = address + NSLog("[Location] Address: %@", address) + } + } + } + + func locationManager(_ manager: CLLocationManager, didFailWithError error: Error) { + NSLog("[Location] Error: %@", error.localizedDescription) + } + + func locationManagerDidChangeAuthorization(_ manager: CLLocationManager) { + authorizationStatus = manager.authorizationStatus + if manager.authorizationStatus == .authorizedWhenInUse || manager.authorizationStatus == .authorizedAlways { + manager.requestLocation() + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Utilities/PhotoSaver.swift b/samples/CameraAccess/CameraAccess/Utilities/PhotoSaver.swift new file mode 100644 index 00000000..695677e7 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/PhotoSaver.swift @@ -0,0 +1,36 @@ +import Photos +import UIKit + +enum PhotoSaver { + static func save(_ image: UIImage) async -> Result { + let status = await PHPhotoLibrary.requestAuthorization(for: .addOnly) + guard status == .authorized || status == .limited else { + return .failure(PhotoSaverError.notAuthorized) + } + + do { + try await PHPhotoLibrary.shared().performChanges { + PHAssetChangeRequest.creationRequestForAsset(from: image) + } + NSLog("[PhotoSaver] Photo saved to camera roll") + return .success(()) + } catch { + NSLog("[PhotoSaver] Failed to save photo: %@", error.localizedDescription) + return .failure(error) + } + } +} + +enum PhotoSaverError: LocalizedError { + case notAuthorized + case noFrame + + var errorDescription: String? { + switch self { + case .notAuthorized: + return "Photo library access not granted. Please allow access in Settings." + case .noFrame: + return "No video frame available to save." + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Utilities/ReportGenerator.swift b/samples/CameraAccess/CameraAccess/Utilities/ReportGenerator.swift new file mode 100644 index 00000000..81a34a70 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/ReportGenerator.swift @@ -0,0 +1,185 @@ +import UIKit + +@MainActor +class ReportGenerator { + private let context: SessionContext + private let pageWidth: CGFloat = 612 + private let pageHeight: CGFloat = 792 + private let margin: CGFloat = 50 + + init(context: SessionContext) { + self.context = context + } + + func generatePDF(title: String = "Field Report") async -> Result { + let contentWidth = pageWidth - margin * 2 + let renderer = UIGraphicsPDFRenderer(bounds: CGRect(x: 0, y: 0, width: pageWidth, height: pageHeight)) + + let formatter = DateFormatter() + formatter.dateStyle = .medium + formatter.timeStyle = .short + + let timeFormatter = DateFormatter() + timeFormatter.timeStyle = .short + + let data = renderer.pdfData { pdfContext in + var yOffset: CGFloat = 0 + + func newPage() { + pdfContext.beginPage() + yOffset = margin + } + + func checkPageBreak(_ needed: CGFloat) { + if yOffset + needed > pageHeight - margin { newPage() } + } + + newPage() + + // Title bar + let titleBarRect = CGRect(x: margin, y: yOffset, width: contentWidth, height: 50) + UIColor(red: 0.05, green: 0.1, blue: 0.25, alpha: 1.0).setFill() + UIBezierPath(roundedRect: titleBarRect, cornerRadius: 8).fill() + + let titleAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 24, weight: .bold), .foregroundColor: UIColor.white] + (title as NSString).draw(at: CGPoint(x: margin + 16, y: yOffset + 12), withAttributes: titleAttrs) + + let badgeAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 10, weight: .medium), .foregroundColor: UIColor.white.withAlphaComponent(0.7)] + let badge = "VisionClaw AI Field Assistant" as NSString + let badgeSize = badge.size(withAttributes: badgeAttrs) + badge.draw(at: CGPoint(x: margin + contentWidth - badgeSize.width - 16, y: yOffset + 20), withAttributes: badgeAttrs) + + yOffset += 70 + + let labelAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 12, weight: .semibold), .foregroundColor: UIColor.black] + let bodyAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 12, weight: .regular), .foregroundColor: UIColor.darkGray] + let sectionHeader: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 14, weight: .bold), .foregroundColor: UIColor(red: 0.2, green: 0.4, blue: 0.8, alpha: 1.0)] + + ("JOB DETAILS" as NSString).draw(at: CGPoint(x: margin, y: yOffset), withAttributes: sectionHeader) + yOffset += 24 + + let detailX = margin + 12 + + func drawField(label: String, value: String) { + checkPageBreak(20) + (label as NSString).draw(at: CGPoint(x: detailX, y: yOffset), withAttributes: labelAttrs) + let labelWidth = (label as NSString).size(withAttributes: labelAttrs).width + (value as NSString).draw(at: CGPoint(x: detailX + labelWidth + 4, y: yOffset), withAttributes: bodyAttrs) + yOffset += 18 + } + + if !context.workerName.isEmpty { drawField(label: "Worker:", value: context.workerName) } + if !context.jobId.isEmpty { drawField(label: "Job ID:", value: context.jobId) } + if !context.jobDescription.isEmpty { drawField(label: "Description:", value: context.jobDescription) } + if !context.siteAddress.isEmpty { drawField(label: "Site:", value: context.siteAddress) } + if let coords = context.coordinates { + let locStr = String(format: "%.5f, %.5f", coords.lat, coords.lon) + (context.reverseGeocodedAddress.map { " (\($0))" } ?? "") + drawField(label: "GPS:", value: locStr) + } + let duration = Date().timeIntervalSince(context.sessionStartTime) + drawField(label: "Session:", value: "\(formatter.string(from: context.sessionStartTime)) (\(Int(duration) / 60) min)") + drawField(label: "Photos saved:", value: "\(context.photosSaved)") + drawField(label: "Notes recorded:", value: "\(context.notes.count)") + yOffset += 20 + + // Photos section + if !context.savedPhotos.isEmpty { + checkPageBreak(40) + ("CAPTURED PHOTOS" as NSString).draw(at: CGPoint(x: margin, y: yOffset), withAttributes: sectionHeader) + yOffset += 24 + + let photoWidth: CGFloat = (contentWidth - 12) / 2 + let photoHeight: CGFloat = photoWidth * 0.75 + + for (index, photo) in context.savedPhotos.enumerated() { + let isLeftColumn = index % 2 == 0 + if isLeftColumn { checkPageBreak(photoHeight + 30) } + + let xPos = isLeftColumn ? margin : margin + photoWidth + 12 + let imageRect = CGRect(x: xPos, y: yOffset, width: photoWidth, height: photoHeight) + photo.image.draw(in: imageRect) + + // Border + UIColor.lightGray.setStroke() + UIBezierPath(rect: imageRect).stroke() + + // Caption + let captionAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 9, weight: .regular), .foregroundColor: UIColor.darkGray] + let timeStr = timeFormatter.string(from: photo.timestamp) + let caption = "\(timeStr) — \(photo.description)" as NSString + caption.draw(in: CGRect(x: xPos, y: yOffset + photoHeight + 2, width: photoWidth, height: 14), withAttributes: captionAttrs) + + if !isLeftColumn || index == context.savedPhotos.count - 1 { + yOffset += photoHeight + 22 + } + } + yOffset += 10 + } + + let categoryOrder = ["hazard", "action_item", "observation", "measurement", "reference", "general"] + let categoryLabels: [String: String] = ["hazard": "HAZARDS & SAFETY ISSUES", "action_item": "ACTION ITEMS", "observation": "OBSERVATIONS", "measurement": "MEASUREMENTS", "reference": "REFERENCES & LOOKUPS", "general": "GENERAL NOTES"] + let categoryColors: [String: UIColor] = ["hazard": UIColor(red: 0.8, green: 0.1, blue: 0.1, alpha: 1.0), "action_item": UIColor(red: 0.8, green: 0.5, blue: 0.0, alpha: 1.0), "observation": UIColor(red: 0.2, green: 0.4, blue: 0.8, alpha: 1.0), "measurement": UIColor(red: 0.2, green: 0.6, blue: 0.3, alpha: 1.0), "reference": UIColor(red: 0.5, green: 0.2, blue: 0.7, alpha: 1.0), "general": UIColor.gray] + + let noteFont = UIFont.systemFont(ofSize: 11, weight: .regular) + let noteTimeFont = UIFont.systemFont(ofSize: 10, weight: .medium) + + for category in categoryOrder { + let categoryNotes = context.notes.filter { $0.category == category } + guard !categoryNotes.isEmpty else { continue } + checkPageBreak(60) + let catColor = categoryColors[category] ?? .gray + let catLabel = categoryLabels[category] ?? category.uppercased() + catColor.setFill() + UIBezierPath(rect: CGRect(x: margin, y: yOffset, width: 4, height: 18)).fill() + let catAttrs: [NSAttributedString.Key: Any] = [.font: UIFont.systemFont(ofSize: 13, weight: .bold), .foregroundColor: catColor] + (catLabel as NSString).draw(at: CGPoint(x: margin + 10, y: yOffset), withAttributes: catAttrs) + yOffset += 26 + + for note in categoryNotes { + let timeStr = timeFormatter.string(from: note.timestamp) + let noteSize = (note.text as NSString).boundingRect(with: CGSize(width: contentWidth - 20, height: .greatestFiniteMagnitude), options: .usesLineFragmentOrigin, attributes: [.font: noteFont], context: nil) + checkPageBreak(noteSize.height + 12) + ("\u{2022}" as NSString).draw(at: CGPoint(x: margin + 4, y: yOffset), withAttributes: [.font: noteFont, .foregroundColor: catColor]) + let timeAttrs: [NSAttributedString.Key: Any] = [.font: noteTimeFont, .foregroundColor: UIColor.gray] + (timeStr as NSString).draw(at: CGPoint(x: margin + 16, y: yOffset), withAttributes: timeAttrs) + let textX = margin + 16 + (timeStr as NSString).size(withAttributes: timeAttrs).width + 8 + (note.text as NSString).draw(in: CGRect(x: textX, y: yOffset, width: contentWidth - (textX - margin), height: noteSize.height + 4), withAttributes: [.font: noteFont, .foregroundColor: UIColor.darkGray]) + yOffset += max(noteSize.height + 4, 16) + + // Append pose line (e.g. "Multiset VPS ±5 cm | map:office-1 (2.31, 1.05, 0.82)") + if let pose = note.pose { + let poseAttrs: [NSAttributedString.Key: Any] = [ + .font: UIFont.systemFont(ofSize: 8, weight: .regular), + .foregroundColor: UIColor.lightGray + ] + ("📍 " + pose.summary as NSString).draw(at: CGPoint(x: margin + 24, y: yOffset), withAttributes: poseAttrs) + yOffset += 12 + } + } + yOffset += 12 + } + + checkPageBreak(40) + yOffset += 10 + UIColor.lightGray.setFill() + UIBezierPath(rect: CGRect(x: margin, y: yOffset, width: contentWidth, height: 0.5)).fill() + yOffset += 8 + ("Generated by VisionClaw AI Field Assistant on \(formatter.string(from: Date()))" as NSString).draw(at: CGPoint(x: margin, y: yOffset), withAttributes: [.font: UIFont.systemFont(ofSize: 9, weight: .regular), .foregroundColor: UIColor.lightGray]) + } + + let documentsURL = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + let dateStr = DateFormatter.localizedString(from: Date(), dateStyle: .short, timeStyle: .none).replacingOccurrences(of: "/", with: "-") + let jobPart = context.jobId.isEmpty ? "" : "_\(context.jobId)" + let fileName = "VisionClaw_Report\(jobPart)_\(dateStr).pdf" + let fileURL = documentsURL.appendingPathComponent(fileName) + + do { + try data.write(to: fileURL) + NSLog("[Report] PDF saved: %@", fileURL.lastPathComponent) + return .success(fileURL) + } catch { + NSLog("[Report] Failed to save PDF: %@", error.localizedDescription) + return .failure(error) + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Utilities/SessionContext.swift b/samples/CameraAccess/CameraAccess/Utilities/SessionContext.swift new file mode 100644 index 00000000..456906ed --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/SessionContext.swift @@ -0,0 +1,111 @@ +import Foundation +import UIKit + +struct SavedPhoto { + let image: UIImage + let description: String + let timestamp: Date + let pose: SpatialPose? + + init(image: UIImage, description: String, pose: SpatialPose? = nil) { + self.image = image + self.description = description + self.timestamp = Date() + self.pose = pose + } +} + +struct SessionNote: Codable, Identifiable { + let id: UUID + let timestamp: Date + let text: String + let category: String + let pose: SpatialPose? + + init(text: String, category: String = "general", pose: SpatialPose? = nil) { + self.id = UUID() + self.timestamp = Date() + self.text = text + self.category = category + self.pose = pose + } +} + +@MainActor +class SessionContext: ObservableObject { + @Published var workerName: String + @Published var jobId: String + @Published var jobDescription: String + @Published var siteAddress: String + @Published var notes: [SessionNote] = [] + @Published var photosSaved: Int = 0 + var savedPhotos: [SavedPhoto] = [] + var coordinates: (lat: Double, lon: Double)? + var reverseGeocodedAddress: String? + let sessionStartTime: Date + weak var spatialService: SpatialLocalizationService? + + init() { + let settings = SettingsManager.shared + self.workerName = settings.workerName + self.jobId = settings.defaultJobId + self.jobDescription = settings.defaultJobDescription + self.siteAddress = settings.defaultSiteAddress + self.sessionStartTime = Date() + } + + func addNote(_ text: String, category: String = "general") { + let pose = spatialService?.currentPose + let note = SessionNote(text: text, category: category, pose: pose) + notes.append(note) + NSLog("[SessionContext] Note #%d saved (%@) [%@]: %@", + notes.count, category, pose?.source.displayName ?? "no-pose", String(text.prefix(80))) + } + + func addPhoto(image: UIImage, description: String) { + let pose = spatialService?.currentPose + savedPhotos.append(SavedPhoto(image: image, description: description, pose: pose)) + photosSaved += 1 + NSLog("[SessionContext] Photo #%d saved [%@]: %@", + photosSaved, pose?.source.displayName ?? "no-pose", description) + } + + func contextString() -> String { + let formatter = DateFormatter() + formatter.dateStyle = .medium + formatter.timeStyle = .short + + var parts: [String] = ["[CURRENT JOB CONTEXT]"] + parts.append("Session started: \(formatter.string(from: sessionStartTime))") + + if !workerName.isEmpty { parts.append("Worker: \(workerName)") } + if !jobId.isEmpty { parts.append("Job ID: \(jobId)") } + if !jobDescription.isEmpty { parts.append("Job: \(jobDescription)") } + if !siteAddress.isEmpty { parts.append("Site: \(siteAddress)") } + if let coords = coordinates { + var locationStr = "GPS: \(String(format: "%.5f", coords.lat)), \(String(format: "%.5f", coords.lon))" + if let address = reverseGeocodedAddress { locationStr += " (\(address))" } + parts.append(locationStr) + } + if photosSaved > 0 { parts.append("Photos saved this session: \(photosSaved)") } + if !notes.isEmpty { + parts.append("\nSession notes so far (\(notes.count) total):") + for note in notes.suffix(20) { + let time = formatter.string(from: note.timestamp) + parts.append("- [\(note.category)] \(time): \(note.text)") + } + } + return parts.joined(separator: "\n") + } + + func notesSummary() -> String { + if notes.isEmpty { return "No notes recorded yet this session." } + let formatter = DateFormatter() + formatter.timeStyle = .short + var lines = ["\(notes.count) note(s) recorded:"] + for note in notes { + lines.append("- [\(note.category)] \(formatter.string(from: note.timestamp)): \(note.text)") + } + return lines.joined(separator: "\n") + } +} diff --git a/samples/CameraAccess/CameraAccess/Utilities/SpatialLocalizationService.swift b/samples/CameraAccess/CameraAccess/Utilities/SpatialLocalizationService.swift new file mode 100644 index 00000000..46715615 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/SpatialLocalizationService.swift @@ -0,0 +1,187 @@ +import CoreLocation +import Foundation +import Combine + +// Attempt to import the Multiset SDK. Drop MultiSetSDK.xcframework into the +// Xcode project (Embed & Sign) and this branch will activate automatically. +#if canImport(MultiSetSDK) +import MultiSetSDK +#endif + +/// Orchestrates multiple spatial localization providers (Multiset VPS → GPS → …). +/// Publishes the best-available pose and provides it for attachment to notes, +/// photos, and generated reports. +@MainActor +final class SpatialLocalizationService: NSObject, ObservableObject { + @Published var currentPose: SpatialPose? + @Published var activeSource: SpatialSource = .unknown + @Published var statusMessage: String = "Idle" + + private let locationService: LocationService + + #if canImport(MultiSetSDK) + private var multisetInitialized = false + private var multisetAvailable: Bool { multisetInitialized } + #else + private var multisetAvailable: Bool { false } + #endif + + init(locationService: LocationService) { + self.locationService = locationService + super.init() + } + + /// Start localization. Called at session start. + func start() { + // Always start GPS as the baseline + locationService.requestPermissionAndStart() + bootstrapInitialPose(source: .gps) + + // Try Multiset if credentials + map code are all configured + if SettingsManager.shared.isMultisetConfigured { + startMultiset() + } else { + statusMessage = "GPS only (Multiset not configured)" + activeSource = .gps + } + } + + /// Stop all providers. Called at session stop. + func stop() { + #if canImport(MultiSetSDK) + if multisetInitialized { + // Real SDK call once xcframework is linked: + // MultiSet.shared.stopLocalization() + multisetInitialized = false + } + #endif + statusMessage = "Stopped" + activeSource = .unknown + } + + // MARK: - GPS Provider + + private func bootstrapInitialPose(source: SpatialSource) { + guard let coord = locationService.currentCoordinate else { return } + self.currentPose = SpatialPose( + source: source, + confidence: 0.5, + timestamp: Date(), + latitude: coord.latitude, + longitude: coord.longitude, + altitude: nil, + heading: nil, + localX: nil, + localY: nil, + localZ: nil, + mapCode: nil + ) + self.activeSource = source + } + + /// Update GPS pose from the LocationService. Called whenever GPS fixes arrive. + func updateFromGPS() { + guard activeSource != .multiset else { return } // Don't downgrade precision + guard let coord = locationService.currentCoordinate else { return } + currentPose = SpatialPose( + source: .gps, + confidence: 0.5, + timestamp: Date(), + latitude: coord.latitude, + longitude: coord.longitude, + altitude: nil, + heading: nil, + localX: nil, + localY: nil, + localZ: nil, + mapCode: nil + ) + if activeSource != .gps { activeSource = .gps } + } + + // MARK: - Multiset VPS Provider + + private func startMultiset() { + #if canImport(MultiSetSDK) + statusMessage = "Starting Multiset VPS…" + NSLog("[Spatial] Initializing Multiset VPS (map: %@)", SettingsManager.shared.multisetMapCode) + + // Real SDK calls (activate once xcframework is linked): + // let config = MultiSetConfig.default( + // clientId: SettingsManager.shared.multisetClientId, + // clientSecret: SettingsManager.shared.multisetClientSecret, + // mapCode: SettingsManager.shared.multisetMapCode + // ) + // MultiSet.shared.initialize(config: config, callback: self) + // MultiSet.shared.localize() + + multisetInitialized = true + activeSource = .multiset + statusMessage = "Multiset VPS active" + #else + statusMessage = "Multiset SDK not linked — add MultiSetSDK.xcframework" + activeSource = .gps + NSLog("[Spatial] Multiset xcframework not found; falling back to GPS") + #endif + } + + /// Handle a Multiset localization result. Wired via delegate when SDK is linked. + fileprivate func handleMultisetSuccess( + localX: Double, localY: Double, localZ: Double, + lat: Double?, lon: Double?, heading: Double?, + confidence: Double + ) { + let pose = SpatialPose( + source: .multiset, + confidence: confidence, + timestamp: Date(), + latitude: lat, + longitude: lon, + altitude: nil, + heading: heading, + localX: localX, + localY: localY, + localZ: localZ, + mapCode: SettingsManager.shared.multisetMapCode + ) + Task { @MainActor in + self.currentPose = pose + self.activeSource = .multiset + self.statusMessage = String(format: "Multiset locked (%.0f%%)", confidence * 100) + NSLog("[Spatial] Multiset pose: %@", pose.summary) + } + } + + fileprivate func handleMultisetFailure(_ error: String) { + Task { @MainActor in + self.statusMessage = "Multiset lost — using GPS" + self.activeSource = .gps + NSLog("[Spatial] Multiset error: %@", error) + } + } +} + +// MARK: - MultiSet SDK Delegate (only compiled when xcframework is linked) + +#if canImport(MultiSetSDK) +// Uncomment and wire once you've verified the Multiset SDK's actual delegate +// protocol names. The method signatures below match their documented patterns. +// +// extension SpatialLocalizationService: MultiSetLocalizationCallback { +// func onLocalizationSuccess(result: LocalizationResult) { +// handleMultisetSuccess( +// localX: Double(result.position.x), +// localY: Double(result.position.y), +// localZ: Double(result.position.z), +// lat: result.latitude, +// lon: result.longitude, +// heading: result.heading, +// confidence: Double(result.confidence ?? 0) +// ) +// } +// +// func onLocalizationFailure(error: Error) { +// handleMultisetFailure(error.localizedDescription) +// } +// } +#endif diff --git a/samples/CameraAccess/CameraAccess/Utilities/SpatialPose.swift b/samples/CameraAccess/CameraAccess/Utilities/SpatialPose.swift new file mode 100644 index 00000000..fb10eed1 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/SpatialPose.swift @@ -0,0 +1,61 @@ +import CoreLocation +import Foundation + +/// Source of a spatial pose fix. Ranked from most precise to least. +enum SpatialSource: String, Codable { + case multiset // Sub-5cm VPS via pre-scanned facility map + case googleGeo // ARCore Geospatial API (future — outdoor mapped areas) + case gps // Phone GPS (fallback, ~5-10m accuracy) + case unknown + + var displayName: String { + switch self { + case .multiset: return "Multiset VPS" + case .googleGeo: return "Google Geospatial" + case .gps: return "GPS" + case .unknown: return "Unknown" + } + } + + /// Approximate accuracy in meters. Used for report defensibility labels. + var accuracyLabel: String { + switch self { + case .multiset: return "±5 cm" + case .googleGeo: return "±1 m" + case .gps: return "±5-10 m" + case .unknown: return "unknown" + } + } +} + +/// A unified pose combining global (WGS-84) and map-local (6-DoF) coordinates. +/// When Multiset is active we populate everything; GPS-only populates lat/lon. +struct SpatialPose: Codable { + let source: SpatialSource + let confidence: Double // 0.0 - 1.0 + let timestamp: Date + + // Global coordinates (WGS-84) + let latitude: Double? + let longitude: Double? + let altitude: Double? + let heading: Double? + + // Map-local coordinates (only populated for Multiset) + let localX: Double? + let localY: Double? + let localZ: Double? + let mapCode: String? + + /// Short human-readable summary used in notes and reports. + var summary: String { + var parts: [String] = [source.displayName, source.accuracyLabel] + if let lat = latitude, let lon = longitude { + parts.append(String(format: "%.5f, %.5f", lat, lon)) + } + if let x = localX, let y = localY, let z = localZ, let map = mapCode { + parts.append(String(format: "map:%@ (%.2f, %.2f, %.2f)", map, x, y, z)) + } + return parts.joined(separator: " | ") + } +} diff --git a/samples/CameraAccess/CameraAccess/Utilities/WebSearchService.swift b/samples/CameraAccess/CameraAccess/Utilities/WebSearchService.swift new file mode 100644 index 00000000..c2913be6 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Utilities/WebSearchService.swift @@ -0,0 +1,64 @@ +import Foundation + +enum WebSearchService { + static func search(_ query: String) async -> Result { + guard let encoded = query.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed), + let url = URL(string: "https://api.duckduckgo.com/?q=\(encoded)&format=json&no_html=1&skip_disambig=1") else { + return .failure(SearchError.invalidQuery) + } + NSLog("[WebSearch] Searching: %@", query) + do { + let config = URLSessionConfiguration.default + config.timeoutIntervalForRequest = 10 + let session = URLSession(configuration: config) + let (data, response) = try await session.data(from: url) + guard let http = response as? HTTPURLResponse, (200...299).contains(http.statusCode) else { + return .failure(SearchError.httpError) + } + guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { + return .failure(SearchError.parseError) + } + var results: [String] = [] + if let answer = json["Answer"] as? String, !answer.isEmpty { results.append("Answer: \(answer)") } + if let abstract = json["AbstractText"] as? String, !abstract.isEmpty { + let source = json["AbstractSource"] as? String ?? "" + results.append("\(abstract)\(source.isEmpty ? "" : " (Source: \(source))")") + } + if let topics = json["RelatedTopics"] as? [[String: Any]] { + for topic in topics.prefix(5) { + if let text = topic["Text"] as? String, !text.isEmpty { results.append("- \(text)") } + } + } + if let infobox = json["Infobox"] as? [String: Any], let content = infobox["content"] as? [[String: Any]] { + var specs: [String] = [] + for item in content.prefix(8) { + if let label = item["label"] as? String, let value = item["value"] as? String { specs.append("\(label): \(value)") } + } + if !specs.isEmpty { results.append("Specifications:\n" + specs.joined(separator: "\n")) } + } + if results.isEmpty { + if let definition = json["Definition"] as? String, !definition.isEmpty { results.append(definition) } + } + if results.isEmpty { + return .success("No detailed results found for '\(query)'. Try a more specific query with model number or manufacturer name.") + } + let resultText = results.joined(separator: "\n\n") + NSLog("[WebSearch] Found %d result sections for: %@", results.count, query) + return .success(resultText) + } catch { + NSLog("[WebSearch] Error: %@", error.localizedDescription) + return .failure(error) + } + } +} + +enum SearchError: LocalizedError { + case invalidQuery, httpError, parseError + var errorDescription: String? { + switch self { + case .invalidQuery: return "Invalid search query" + case .httpError: return "Search service returned an error" + case .parseError: return "Could not parse search results" + } + } +} diff --git a/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift b/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift index 29203cd8..253504c2 100644 --- a/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/ViewModels/StreamSessionViewModel.swift @@ -352,12 +352,8 @@ class StreamSessionViewModel: ObservableObject { return "The operation timed out. Please try again." case .videoStreamingError: return "Video streaming failed. Please try again." - case .audioStreamingError: - return "Audio streaming failed. Please try again." case .permissionDenied: return "Camera permission denied. Please grant permission in Settings." - case .hingesClosed: - return "The hinges on the glasses were closed. Please open the hinges and try again." @unknown default: return "An unknown streaming error occurred." } diff --git a/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift b/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift index 67ec11fb..0728ba1d 100644 --- a/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift +++ b/samples/CameraAccess/CameraAccess/Views/Components/GeminiOverlayView.swift @@ -10,6 +10,17 @@ struct GeminiStatusBar: View { // OpenClaw connection pill StatusPill(color: openClawStatusColor, text: openClawStatusText) + + if let spatial = geminiVM.spatialService { + SpatialBadge(spatialService: spatial) + } + + if geminiVM.isInspectionActive { + InspectionBadge() + } + if geminiVM.isSafetyMonitorActive { + SafetyBadge() + } } } @@ -149,6 +160,50 @@ struct ToolCallStatusView: View { } } +struct InspectionBadge: View { + @State private var pulsing = false + + var body: some View { + HStack(spacing: 4) { + Circle() + .fill(Color.blue) + .frame(width: 8, height: 8) + .scaleEffect(pulsing ? 1.3 : 1.0) + .animation(.easeInOut(duration: 0.8).repeatForever(autoreverses: true), value: pulsing) + Text("Inspecting") + .font(.system(size: 11, weight: .semibold)) + .foregroundColor(.white) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(Color.blue.opacity(0.4)) + .cornerRadius(12) + .onAppear { pulsing = true } + } +} + +struct SafetyBadge: View { + @State private var pulsing = false + + var body: some View { + HStack(spacing: 4) { + Image(systemName: "shield.checkered") + .font(.system(size: 10, weight: .bold)) + .foregroundColor(.yellow) + .scaleEffect(pulsing ? 1.2 : 1.0) + .animation(.easeInOut(duration: 1.0).repeatForever(autoreverses: true), value: pulsing) + Text("Safety") + .font(.system(size: 11, weight: .semibold)) + .foregroundColor(.white) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(Color.orange.opacity(0.4)) + .cornerRadius(12) + .onAppear { pulsing = true } + } +} + struct SpeakingIndicator: View { @State private var animating = false @@ -170,3 +225,49 @@ struct SpeakingIndicator: View { .onDisappear { animating = false } } } + + +struct SpatialBadge: View { + @ObservedObject var spatialService: SpatialLocalizationService + + var body: some View { + HStack(spacing: 4) { + Image(systemName: icon) + .font(.system(size: 10, weight: .bold)) + Text(label) + .font(.system(size: 11, weight: .semibold)) + } + .foregroundColor(.white) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background(color.opacity(0.9)) + .cornerRadius(8) + } + + private var icon: String { + switch spatialService.activeSource { + case .multiset: return "scope" + case .googleGeo: return "globe" + case .gps: return "location.fill" + case .unknown: return "location.slash" + } + } + + private var label: String { + switch spatialService.activeSource { + case .multiset: return "VPS ±5cm" + case .googleGeo: return "GEO ±1m" + case .gps: return "GPS" + case .unknown: return "Spatial" + } + } + + private var color: Color { + switch spatialService.activeSource { + case .multiset: return .purple + case .googleGeo: return .indigo + case .gps: return .blue + case .unknown: return .gray + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Views/NonStreamView.swift b/samples/CameraAccess/CameraAccess/Views/NonStreamView.swift index df8b090f..2a6bbbf2 100644 --- a/samples/CameraAccess/CameraAccess/Views/NonStreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/NonStreamView.swift @@ -28,7 +28,13 @@ struct NonStreamView: View { Color.black.edgesIgnoringSafeArea(.all) VStack { - HStack { + HStack(spacing: 8) { + Text("Demo By:") + .font(.system(size: 11, weight: .medium)) + .foregroundColor(.white.opacity(0.6)) + Text("FieldMatrix.Ai") + .font(.system(size: 14, weight: .bold)) + .foregroundColor(.cyan) Spacer() Menu { Button("Settings") { diff --git a/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift b/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift index 8fa01b55..c7dbf3f1 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamSessionView.swift @@ -42,6 +42,10 @@ struct StreamSessionView: View { viewModel.geminiSessionVM = geminiVM viewModel.webrtcSessionVM = webrtcVM geminiVM.streamingMode = viewModel.streamingMode + geminiVM.webrtcVM = webrtcVM + geminiVM.frameProvider = { [weak viewModel] in + viewModel?.currentVideoFrame + } } .onChange(of: viewModel.streamingMode) { newMode in geminiVM.streamingMode = newMode diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index 3fc83f72..9d48db21 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -51,6 +51,13 @@ struct StreamView: View { .foregroundColor(.white) } + // Branding banner (top) + VStack { + BrandingBanner(noteCount: geminiVM.sessionContext?.notes.count ?? 0) + Spacer() + } + .padding(.top, 4) + // Gemini status overlay (top) + speaking indicator if geminiVM.isGeminiActive { VStack { @@ -125,6 +132,15 @@ struct StreamView: View { ) } } + // Report share sheet + .sheet(isPresented: Binding( + get: { geminiVM.reportURLToShare != nil }, + set: { if !$0 { geminiVM.reportURLToShare = nil } } + )) { + if let url = geminiVM.reportURLToShare { + ActivityViewController(activityItems: [url]) + } + } // Gemini error alert .alert("AI Assistant", isPresented: Binding( get: { geminiVM.errorMessage != nil }, @@ -146,6 +162,49 @@ struct StreamView: View { } } +// MARK: - Branding Banner + +struct BrandingBanner: View { + let noteCount: Int + + var body: some View { + HStack(spacing: 8) { + Text("Demo By:") + .font(.system(size: 11, weight: .medium)) + .foregroundColor(.white.opacity(0.6)) + Text("FieldMatrix.Ai") + .font(.system(size: 14, weight: .bold)) + .foregroundColor(.cyan) + + if noteCount > 0 { + Text("\(noteCount) notes") + .font(.system(size: 11, weight: .medium)) + .foregroundColor(.white.opacity(0.7)) + .padding(.horizontal, 8) + .padding(.vertical, 2) + .background(Color.white.opacity(0.15)) + .cornerRadius(8) + } + } + .padding(.horizontal, 14) + .padding(.vertical, 6) + .background(Color.black.opacity(0.5)) + .cornerRadius(16) + } +} + +// MARK: - Activity View Controller (Share Sheet) + +struct ActivityViewController: UIViewControllerRepresentable { + let activityItems: [Any] + + func makeUIViewController(context: Context) -> UIActivityViewController { + UIActivityViewController(activityItems: activityItems, applicationActivities: nil) + } + + func updateUIViewController(_ uiViewController: UIActivityViewController, context: Context) {} +} + // Extracted controls for clarity struct ControlsView: View { @ObservedObject var viewModel: StreamSessionViewModel @@ -172,7 +231,7 @@ struct ControlsView: View { } } - // Gemini AI button (disabled when WebRTC is active — audio conflict) + // Gemini AI button CircleButton( icon: geminiVM.isGeminiActive ? "waveform.circle.fill" : "waveform.circle", text: "AI" @@ -185,10 +244,8 @@ struct ControlsView: View { } } } - .opacity(webrtcVM.isActive ? 0.4 : 1.0) - .disabled(webrtcVM.isActive) - // WebRTC Live Stream button (disabled when Gemini is active — audio conflict) + // WebRTC Live Stream button (collaborative mode when Gemini also active) CircleButton( icon: webrtcVM.isActive ? "antenna.radiowaves.left.and.right.circle.fill" @@ -200,11 +257,13 @@ struct ControlsView: View { webrtcVM.stopSession() } else { await webrtcVM.startSession() + // Enter collaborative mode if Gemini is already active + if geminiVM.isGeminiActive { + webrtcVM.enterCollaborativeMode() + } } } } - .opacity(geminiVM.isGeminiActive ? 0.4 : 1.0) - .disabled(geminiVM.isGeminiActive) } } } diff --git a/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift b/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift index aec172c6..d33a8d46 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/SignalingClient.swift @@ -76,6 +76,14 @@ class SignalingClient { ] as [String: Any]) } + func sendTranscript(speaker: String, text: String) { + sendJSON([ + "type": "transcript", + "speaker": speaker, + "text": text, + ] as [String: Any]) + } + func disconnect() { receiveTask?.cancel() receiveTask = nil diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift index ad7319c1..1526a2f7 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCClient.swift @@ -15,8 +15,8 @@ class WebRTCClient: NSObject { private let factory: RTCPeerConnectionFactory private var peerConnection: RTCPeerConnection? - private var videoSource: RTCVideoSource! - private var videoCapturer: CustomVideoCapturer! + private var videoSource: RTCVideoSource? + private var videoCapturer: CustomVideoCapturer? private var localVideoTrack: RTCVideoTrack? private var localAudioTrack: RTCAudioTrack? private(set) var remoteVideoTrack: RTCVideoTrack? @@ -52,11 +52,14 @@ class WebRTCClient: NSObject { private func createMediaTracks() { // Video track — custom source fed by DAT SDK frames - videoSource = factory.videoSource() - videoCapturer = CustomVideoCapturer(delegate: videoSource) - localVideoTrack = factory.videoTrack(with: videoSource, trackId: "video0") + let source = factory.videoSource() + videoSource = source + videoCapturer = CustomVideoCapturer(delegate: source) + localVideoTrack = factory.videoTrack(with: source, trackId: "video0") localVideoTrack?.isEnabled = true - peerConnection?.add(localVideoTrack!, streamIds: ["stream0"]) + if let track = localVideoTrack { + peerConnection?.add(track, streamIds: ["stream0"]) + } // Audio track — WebRTC native audio (handles mic capture, AEC, playback) let audioConstraints = RTCMediaConstraints( @@ -65,7 +68,9 @@ class WebRTCClient: NSObject { let audioSource = factory.audioSource(with: audioConstraints) localAudioTrack = factory.audioTrack(with: audioSource, trackId: "audio0") localAudioTrack?.isEnabled = true - peerConnection?.add(localAudioTrack!, streamIds: ["stream0"]) + if let track = localAudioTrack { + peerConnection?.add(track, streamIds: ["stream0"]) + } } /// Called by ViewModel to push video frames from DAT SDK / iPhone camera. diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCOverlayView.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCOverlayView.swift index 91129004..a18bed9b 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCOverlayView.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCOverlayView.swift @@ -17,6 +17,10 @@ struct WebRTCStatusBar: View { text: webrtcVM.isMuted ? "Muted" : "Mic On" ) } + + if webrtcVM.isCollaborativeMode { + CollaborativeBadge() + } } } @@ -42,6 +46,23 @@ struct WebRTCStatusBar: View { } } +struct CollaborativeBadge: View { + var body: some View { + HStack(spacing: 4) { + Image(systemName: "person.2.fill") + .font(.system(size: 10, weight: .bold)) + .foregroundColor(.cyan) + Text("Collab") + .font(.system(size: 11, weight: .semibold)) + .foregroundColor(.white) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(Color.cyan.opacity(0.3)) + .cornerRadius(12) + } +} + struct RoomCodePill: View { let code: String @State private var showCopied: Bool = false diff --git a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift index a2463ce8..c183c555 100644 --- a/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/WebRTC/WebRTCSessionViewModel.swift @@ -22,6 +22,7 @@ class WebRTCSessionViewModel: ObservableObject { @Published var errorMessage: String? @Published var remoteVideoTrack: RTCVideoTrack? @Published var hasRemoteVideo: Bool = false + @Published var isCollaborativeMode: Bool = false private var webRTCClient: WebRTCClient? private var signalingClient: SignalingClient? @@ -64,6 +65,7 @@ class WebRTCSessionViewModel: ObservableObject { isMuted = false remoteVideoTrack = nil hasRemoteVideo = false + isCollaborativeMode = false } func toggleMute() { @@ -77,6 +79,17 @@ class WebRTCSessionViewModel: ObservableObject { webRTCClient?.pushVideoFrame(image) } + func enterCollaborativeMode() { + guard isActive else { return } + isCollaborativeMode = true + NSLog("[WebRTC] Collaborative mode enabled (AI + Live)") + } + + func broadcastTranscript(speaker: String, text: String) { + guard isActive, isCollaborativeMode else { return } + signalingClient?.sendTranscript(speaker: speaker, text: text) + } + // MARK: - WebRTC + Signaling Setup private func setupWebRTCClient(iceServers: [RTCIceServer]?) {