From e9b478d6c4c1ddfb34ca432651391398012f8390 Mon Sep 17 00:00:00 2001 From: Elliot Chen Date: Wed, 17 Dec 2025 17:00:53 +0800 Subject: [PATCH 1/7] feat: voice to image with gpt image 1 point 5 --- .../examples/voice-image-kids/.env.example | 12 + .../examples/voice-image-kids/Dockerfile | 83 +++++ .../examples/voice-image-kids/README.md | 312 ++++++++++++++++++ .../examples/voice-image-kids/Taskfile.yml | 80 +++++ .../examples/voice-image-kids/tenapp/go.mod | 7 + .../examples/voice-image-kids/tenapp/main.go | 71 ++++ .../tenapp/manifest-lock.json | 202 ++++++++++++ .../voice-image-kids/tenapp/manifest.json | 40 +++ .../voice-image-kids/tenapp/property.json | 189 +++++++++++ .../tenapp/scripts/install_python_deps.sh | 93 ++++++ .../voice-image-kids/tenapp/scripts/start.sh | 13 + .../extension/main_python/__init__.py | 8 + .../extension/main_python/addon.py | 20 ++ .../extension/main_python/agent/__init__.py | 0 .../extension/main_python/agent/agent.py | 225 +++++++++++++ .../extension/main_python/agent/decorators.py | 16 + .../extension/main_python/agent/events.py | 73 ++++ .../extension/main_python/agent/llm_exec.py | 278 ++++++++++++++++ .../extension/main_python/config.py | 14 + .../extension/main_python/extension.py | 168 ++++++++++ .../extension/main_python/helper.py | 88 +++++ .../extension/main_python/manifest.json | 34 ++ .../extension/main_python/property.json | 3 + .../extension/main_python/requirements.txt | 1 + .../openai_gpt_image_python/README.md | 221 +++++++++++++ .../openai_gpt_image_python/__init__.py | 6 + .../openai_gpt_image_python/addon.py | 20 ++ .../openai_gpt_image_python/config.py | 80 +++++ .../openai_gpt_image_python/extension.py | 285 ++++++++++++++++ .../openai_gpt_image_python/manifest.json | 148 +++++++++ .../openai_image_client.py | 191 +++++++++++ .../openai_gpt_image_python/property.json | 17 + .../openai_gpt_image_python/requirements.txt | 2 + ai_agents/playground/bun.lock | 1 + 34 files changed, 3001 insertions(+) create mode 100644 ai_agents/agents/examples/voice-image-kids/.env.example create mode 100644 ai_agents/agents/examples/voice-image-kids/Dockerfile create mode 100644 ai_agents/agents/examples/voice-image-kids/README.md create mode 100644 ai_agents/agents/examples/voice-image-kids/Taskfile.yml create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/go.mod create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/main.go create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/property.json create mode 100755 ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh create mode 100755 ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/__init__.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/__init__.py create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/addon.py create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/property.json create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/requirements.txt diff --git a/ai_agents/agents/examples/voice-image-kids/.env.example b/ai_agents/agents/examples/voice-image-kids/.env.example new file mode 100644 index 0000000000..cb156a9325 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/.env.example @@ -0,0 +1,12 @@ +# Required Environment Variables + +# Agora RTC (for voice input) +AGORA_APP_ID=your_agora_app_id_here +AGORA_APP_CERTIFICATE=your_agora_certificate_here # Optional + +# OpenAI (for ASR, LLM, and Image Generation) +OPENAI_API_KEY=sk-your_openai_api_key_here + +# Optional Customization +OPENAI_MODEL=gpt-4o-mini # LLM model +OPENAI_BASE_URL=https://api.openai.com/v1 # Custom endpoint diff --git a/ai_agents/agents/examples/voice-image-kids/Dockerfile b/ai_agents/agents/examples/voice-image-kids/Dockerfile new file mode 100644 index 0000000000..c6a3f40fbe --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/Dockerfile @@ -0,0 +1,83 @@ +FROM ghcr.io/ten-framework/ten_agent_build:0.7.12 AS builder + +ARG USE_AGENT=agents/examples/voice-assistant + +WORKDIR /app + +COPY .env.example .env +COPY server/ server/ +COPY agents/scripts/ agents/scripts/ +COPY agents/ten_packages/ agents/ten_packages/ +COPY playground/ playground/ + +# Copy tenapp files explicitly to avoid symlink issues +COPY ${USE_AGENT}/tenapp/go.* ${USE_AGENT}/tenapp/ +COPY ${USE_AGENT}/tenapp/main.go ${USE_AGENT}/tenapp/ +COPY ${USE_AGENT}/tenapp/manifest* ${USE_AGENT}/tenapp/ +COPY ${USE_AGENT}/tenapp/property.json ${USE_AGENT}/tenapp/ +COPY ${USE_AGENT}/tenapp/scripts/ ${USE_AGENT}/tenapp/scripts/ + +# Copy extension directories that are actual directories (not symlinks) +COPY ${USE_AGENT}/tenapp/ten_packages/extension/main_python/ ${USE_AGENT}/tenapp/ten_packages/extension/main_python/ + +# Copy other example files +COPY ${USE_AGENT}/README.md ${USE_AGENT}/ +COPY ${USE_AGENT}/Taskfile*.yml ${USE_AGENT}/ + +RUN cd /app/${USE_AGENT} && \ + task install && task release + +# Build frontend in builder stage (bun already done by task install) +WORKDIR /app/playground +RUN NEXT_PUBLIC_EDIT_GRAPH_MODE=false bun run build +WORKDIR /app + +FROM ubuntu:22.04 + +ARG USE_AGENT=agents/examples/voice-assistant + +RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends \ + libasound2 \ + libgstreamer1.0-dev \ + libunwind-dev \ + libc++1 \ + libssl-dev \ + python3 \ + python3-venv \ + python3-pip \ + python3-dev \ + jq vim \ + ca-certificates \ + curl \ + unzip \ + && apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* + +# Install Node.js 20 +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ + && apt-get install -y nodejs + +# Install Bun using the official install script +RUN curl -fsSL https://bun.com/install | bash +# Add Bun to the PATH (if not already added by the install script) +ENV PATH="/root/.bun/bin:$PATH" + +# Install Task +RUN sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin + +WORKDIR /app + +COPY --from=builder /app/${USE_AGENT}/tenapp/.release/ /app/agents/ +COPY --from=builder /app/server/bin/api /app/server/bin/api +COPY --from=builder /usr/local/lib /usr/local/lib +COPY --from=builder /usr/lib/python3 /usr/lib/python3 +COPY --from=builder /usr/local/bin/tman /usr/local/bin/tman + +# Copy built frontend from builder stage +COPY --from=builder /app/playground/ /app/playground/ + +# Copy Docker Taskfile +COPY --from=builder /app/${USE_AGENT}/Taskfile.docker.yml /app/Taskfile.docker.yml + +EXPOSE 8080 3000 + +ENTRYPOINT ["task", "-t", "Taskfile.docker.yml", "run-prod"] diff --git a/ai_agents/agents/examples/voice-image-kids/README.md b/ai_agents/agents/examples/voice-image-kids/README.md new file mode 100644 index 0000000000..37b5fa52d7 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/README.md @@ -0,0 +1,312 @@ +# Voice Image Kids - AI Art Generator for Children + +A delightful voice-to-image generation application built with TEN Framework. Kids speak what they want to draw, and AI creates it instantly! + +## Features + +- **Voice Activity Detection**: Automatic speech detection - no buttons needed! +- **Natural Speech Input**: Kids just talk naturally about what they want to create +- **GPT Image 1.5**: Latest, fastest OpenAI image generation (4x faster than DALL-E 3) +- **Kid-Friendly UI**: Colorful, engaging interface via shared playground +- **Instant Results**: Images appear in seconds +- **Safe & Encouraging**: Gentle error messages and positive feedback + +## How It Works + +1. **Kid speaks**: "I want a purple dragon flying over a rainbow castle!" +2. **AI listens**: OpenAI Whisper transcribes speech +3. **AI understands**: GPT-4o-mini processes the request +4. **AI creates**: GPT Image 1.5 generates the image +5. **Kid sees**: Image appears in the chat! + +## Prerequisites + +### Required API Keys + +1. **Agora Account** (for voice input) + - Sign up at [console.agora.io](https://console.agora.io/) + - Get your `AGORA_APP_ID` + +2. **OpenAI Account** (for everything else) + - Sign up at [platform.openai.com](https://platform.openai.com/) + - Get your `OPENAI_API_KEY` + - You'll need access to: + - Whisper (speech-to-text) + - GPT-4o-mini (language model) + - GPT Image 1.5 (image generation) + +### System Requirements + +- **Node.js**: >= 20 +- **Bun**: Latest version +- **Go**: For API server +- **Python**: 3.10+ (for TEN extensions) +- **TEN Framework**: Installed via `tman` + +## Quick Start + +### 1. Clone and Navigate + +```bash +cd ai_agents/agents/examples/voice-image-kids +``` + +### 2. Set Environment Variables + +Create a `.env` file in the root `ai_agents` directory: + +```bash +# Required +AGORA_APP_ID=your_agora_app_id +OPENAI_API_KEY=sk-your_openai_key + +# Optional +AGORA_APP_CERTIFICATE=your_certificate +OPENAI_MODEL=gpt-4o-mini +``` + +### 3. Install Dependencies + +```bash +task install +``` + +This will: +- Install TEN framework packages +- Install Python dependencies +- Install frontend (shared playground) +- Build the API server + +### 4. Run the App + +```bash +task run +``` + +This starts: +- TEN Runtime (agent backend) +- API Server (port 8080) +- TMAN Designer (port 49483) +- Frontend will be available via the shared playground + +### 5. Access the Application + +- **Frontend**: http://localhost:3000 +- **API Server**: http://localhost:8080 +- **TMAN Designer**: http://localhost:49483 + +## Usage + +1. Open http://localhost:3000 in your browser +2. Allow microphone access when prompted +3. Start speaking! For example: + - "I want a spaceship in outer space!" + - "Draw a cute puppy playing in a park" + - "Create a magical fairy castle with rainbows" +4. Watch as the AI creates your image! + +## Configuration + +### Agent Graph + +The app uses these components: + +- **agora_rtc**: Audio I/O with voice activity detection +- **openai_asr_python**: Speech-to-text (Whisper) +- **openai_llm2_python**: Language model (GPT-4o-mini) +- **openai_gpt_image_python**: Image generation (GPT Image 1.5) +- **main_python**: Orchestration +- **message_collector**: Chat history + +### Customization + +Edit `tenapp/property.json` to customize: + +**LLM Prompt** (make it more/less kid-friendly): +```json +{ + "nodes": [{ + "name": "llm", + "property": { + "prompt": "Your custom system prompt here..." + } + }] +} +``` + +**Image Settings**: +```json +{ + "nodes": [{ + "name": "image_gen_tool", + "property": { + "params": { + "model": "gpt-image-1.5", // or "dall-e-3" + "size": "1024x1024", // or "1792x1024", "1024x1792" + "quality": "standard" // or "hd" for higher quality + } + } + }] +} +``` + +## Project Structure + +``` +voice-image-kids/ +├── tenapp/ +│ ├── property.json # Agent graph configuration +│ ├── manifest.json # App metadata +│ └── ten_packages/ +│ └── extension/ +│ └── main_python/ # Main control logic +│ ├── extension.py # Event handlers +│ ├── config.py # Configuration +│ └── agent/ # Agent framework +├── Taskfile.yml # Build & run automation +├── Dockerfile # Container deployment +├── .env.example # Environment template +└── README.md # This file +``` + +## Troubleshooting + +### No Voice Input Detected + +- Check microphone permissions in browser +- Verify `AGORA_APP_ID` is set correctly +- Check browser console for errors + +### Images Not Generating + +- Verify `OPENAI_API_KEY` has access to GPT Image 1.5 +- Check TEN runtime logs: `tail -f tenapp/logs/latest.log` +- Try fallback model (DALL-E 3) in configuration + +### "API key is invalid" + +- Double-check `OPENAI_API_KEY` in `.env` +- Ensure no extra spaces or quotes +- Verify key is active on OpenAI platform + +### Installation Fails + +```bash +# Clean install +cd tenapp +rm -rf ten_packages +tman install +./scripts/install_python_deps.sh +``` + +## Docker Deployment + +### Build Image + +```bash +cd ai_agents +docker build -f agents/examples/voice-image-kids/Dockerfile -t voice-image-kids . +``` + +### Run Container + +```bash +docker run --rm -it --env-file .env \ + -p 8080:8080 \ + -p 3000:3000 \ + -p 49483:49483 \ + voice-image-kids +``` + +### Access + +- Frontend: http://localhost:3000 +- API: http://localhost:8080 +- TMAN Designer: http://localhost:49483 + +## Development + +### Visual Graph Designer + +Access TMAN Designer at http://localhost:49483 to: +- Visualize the agent graph +- Modify connections visually +- Test different configurations +- Add new extensions + +### Adding New Features + +1. **Add a new extension** to `tenapp/property.json` +2. **Configure connections** in the graph +3. **Update main_python** to handle new events +4. **Test** with `task run` + +### Debugging + +Enable debug mode in `tenapp/property.json`: + +```json +{ + "nodes": [{ + "name": "image_gen_tool", + "property": { + "dump": true, + "dump_path": "./debug_images.json" + } + }] +} +``` + +View logs: +```bash +# TEN runtime logs +tail -f tenapp/logs/latest.log + +# API server logs +# (shown in terminal where you ran `task run`) +``` + +## Safety & Content Policy + +The app uses OpenAI's content policy filtering. If an image request violates policies, kids will see: +> "I can't create that image. Let's try something different!" + +The LLM is configured to be encouraging and kid-friendly. + +## Performance + +- **Voice-to-Text**: ~500ms (Whisper) +- **LLM Processing**: ~1-2s (GPT-4o-mini) +- **Image Generation**: ~3-5s (GPT Image 1.5) - 4x faster than DALL-E 3! +- **Total**: ~5-8 seconds from speech to image + +## Cost Estimation + +Per image generation: +- Whisper transcription: ~$0.006/minute +- GPT-4o-mini: ~$0.001 (for prompt processing) +- GPT Image 1.5 (1024x1024, standard): ~$0.04 +- **Total per image**: ~$0.05 + +(Prices as of December 2024, may vary) + +## Learn More + +- [TEN Framework Documentation](https://doc.theten.ai) +- [OpenAI Image Generation Guide](https://platform.openai.com/docs/guides/image-generation) +- [GPT Image 1.5 Announcement](https://openai.com/index/new-chatgpt-images-is-here/) +- [Agora RTC Documentation](https://docs.agora.io/en/) + +## License + +This example is part of the TEN Framework, licensed under the Apache License, Version 2.0. + +## Support + +- **TEN Framework Issues**: [github.com/TEN-framework/TEN-Agent](https://github.com/TEN-framework/TEN-Agent) +- **OpenAI Support**: [help.openai.com](https://help.openai.com/) +- **Agora Support**: [agora.io/support](https://www.agora.io/en/customer-support/) + +--- + +**Have fun creating amazing art with AI!** 🎨✨ diff --git a/ai_agents/agents/examples/voice-image-kids/Taskfile.yml b/ai_agents/agents/examples/voice-image-kids/Taskfile.yml new file mode 100644 index 0000000000..2eb69ded46 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/Taskfile.yml @@ -0,0 +1,80 @@ +version: "3" + +dotenv: ["../../../.env"] + +tasks: + # install scripts + install-frontend: + desc: install frontend dependencies (shared playground) + internal: true + dir: ../../../playground + cmds: + - bun install --verbose + + install-tenapp: + desc: install tenapp dependencies + internal: true + dir: ./tenapp + cmds: + - tman install + + install-tenapp-python-deps: + desc: install tenapp python dependencies + deps: [install-tenapp] + internal: true + dir: ./tenapp + cmds: + - ./scripts/install_python_deps.sh + + build-api-server: + desc: build api server + dir: ../../../server + cmds: + - go mod tidy && go mod download && go build -o bin/api main.go + + install: + desc: install all dependencies + dir: ./tenapp + cmds: + - task: install-tenapp + - task: install-tenapp-python-deps + - task: install-frontend + - task: build-api-server + + # run scripts + run-tenapp: + desc: run tenapp + dir: ./tenapp + cmds: + - tman run start + + run-gd-server: + desc: run tman dev http server for TMAN Designer + dir: ./tenapp + cmds: + - tman designer + + run-frontend: + desc: run frontend (shared playground) + dir: ../../../playground + cmds: + - bun run dev + + run-api-server: + desc: run api server + dir: ../../../server + cmds: + - ./bin/api -tenapp_dir={{.PWD}}/tenapp + + run: + desc: run everything + deps: + - task: run-gd-server + - task: run-frontend + - task: run-api-server + + # release scripts + release: + desc: release + cmds: + - ../../scripts/release.sh {{.PWD}}/tenapp diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/go.mod b/ai_agents/agents/examples/voice-image-kids/tenapp/go.mod new file mode 100644 index 0000000000..b5fa6db087 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/go.mod @@ -0,0 +1,7 @@ +module app + +go 1.20 + +replace ten_framework => ./ten_packages/system/ten_runtime_go/interface + +require ten_framework v0.0.0-00010101000000-000000000000 diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/main.go b/ai_agents/agents/examples/voice-image-kids/tenapp/main.go new file mode 100644 index 0000000000..f91569109f --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/main.go @@ -0,0 +1,71 @@ +/** + * + * Agora Real Time Engagement + * Created by Wei Hu in 2022-10. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +package main + +import ( + "flag" + "log" + "os" + + ten "ten_framework/ten_runtime" +) + +type appConfig struct { + PropertyFilePath string +} + +type defaultApp struct { + ten.DefaultApp + + cfg *appConfig +} + +func (p *defaultApp) OnConfigure( + tenEnv ten.TenEnv, +) { + // Using the default property.json if not specified. + if len(p.cfg.PropertyFilePath) > 0 { + if b, err := os.ReadFile(p.cfg.PropertyFilePath); err != nil { + log.Fatalf("Failed to read property file %s, err %v\n", p.cfg.PropertyFilePath, err) + } else { + tenEnv.InitPropertyFromJSONBytes(b) + } + } + + tenEnv.OnConfigureDone() +} + +func startAppBlocking(cfg *appConfig) { + appInstance, err := ten.NewApp(&defaultApp{ + cfg: cfg, + }) + if err != nil { + log.Fatalf("Failed to create the app, %v\n", err) + } + + appInstance.Run(true) + appInstance.Wait() + + ten.EnsureCleanupWhenProcessExit() +} + +func setDefaultLog() { + log.SetFlags(log.LstdFlags | log.Lmicroseconds) +} + +func main() { + // Set the default log format globally, users can use `log.Println()` directly. + setDefaultLog() + + cfg := &appConfig{} + + flag.StringVar(&cfg.PropertyFilePath, "property", "", "The absolute path of property.json") + flag.Parse() + + startAppBlocking(cfg) +} diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json new file mode 100644 index 0000000000..0595606ec9 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json @@ -0,0 +1,202 @@ +{ + "version": 1, + "packages": [ + { + "type": "system", + "name": "ten_runtime_go", + "version": "0.11.45", + "hash": "7c943ec076526ae84307c25dd4a5471a932d1d2818559e8c50da165d35c12050", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime" + } + ], + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.23.9-t1", + "hash": "6f08c87c362d30bbfd831899435fec06982a2924bd9cc0f5cb7e936974bf5731", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime" + }, + { + "type": "system", + "name": "agora_rtc_sdk" + } + ], + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + }, + { + "type": "system", + "name": "ten_ai_base", + "version": "0.7.30", + "hash": "1608448d02897a411f6b863974fcaf12a695e0f588ceab1250e5581344dd0280", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + } + ] + }, + { + "type": "extension", + "name": "streamid_adapter", + "version": "0.2.0", + "hash": "6df9a1b48a4007339ccbda84ff96a4ed4eb5df498b8fef021ca2302be7c55051", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + } + ], + "path": "../../../ten_packages/extension/streamid_adapter" + }, + { + "type": "extension", + "name": "openai_asr_python", + "version": "0.2.2", + "hash": "dc0d2ecf5c104c396b0539e7f915041453d007aedebd121740c5bb965fd513dc", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + }, + { + "type": "system", + "name": "ten_ai_base" + } + ], + "path": "../../../ten_packages/extension/openai_asr_python" + }, + { + "type": "extension", + "name": "openai_llm2_python", + "version": "0.2.0", + "hash": "ad24801cf718276830749bc50d72c5eba969e5d0ae7538ebd6e6d1ad0b414244", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + }, + { + "type": "system", + "name": "ten_ai_base" + } + ], + "path": "../../../ten_packages/extension/openai_llm2_python" + }, + { + "type": "extension", + "name": "openai_gpt_image_python", + "version": "0.1.0", + "hash": "9ea64a9f4e13b8dce8d047f18ffc2416a72d082936e825cb9326e0ff8878a163", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + }, + { + "type": "system", + "name": "ten_ai_base" + } + ], + "path": "../../../ten_packages/extension/openai_gpt_image_python" + }, + { + "type": "extension", + "name": "message_collector2", + "version": "0.2.0", + "hash": "b4cdc75c61ed572952e2f14bdb5c440f993d3a5bf2da6fe5f5cbb1b4d772a837", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + } + ], + "path": "../../../ten_packages/extension/message_collector2" + }, + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.11.45", + "hash": "2acf23c9e954ccad6aa2979f759535e46f98c654d65d9c9e4e6e626e043a0e47", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime" + }, + { + "type": "addon_loader", + "name": "python_addon_loader" + } + ], + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + }, + { + "type": "system", + "name": "ten_runtime", + "version": "0.11.45", + "hash": "3e1093675a4c3eecc340662886a948384c9cfa7088af51e0f9344ba11c9bd84c", + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + }, + { + "type": "system", + "name": "agora_rtc_sdk", + "version": "4.4.32-141", + "hash": "d0a96b4672127492eee249f123750638071f9bcb673b5371320c8e16cdaff1b5", + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + }, + { + "type": "addon_loader", + "name": "python_addon_loader", + "version": "0.11.45", + "hash": "13e9c33f444ced60f7af6f00ef6741dcb113790eb13d41ef6ef52c58ff606123", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime" + }, + { + "type": "system", + "name": "ten_runtime_python" + } + ], + "supports": [ + { + "os": "linux", + "arch": "x64" + } + ] + } + ] +} \ No newline at end of file diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json new file mode 100644 index 0000000000..0331b67380 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json @@ -0,0 +1,40 @@ +{ + "type": "app", + "name": "voice_image_kids", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_go", + "version": "0.11" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "=0.23.9-t1" + }, + { + "type": "system", + "name": "ten_ai_base", + "version": "0.7" + }, + { + "path": "../../../ten_packages/extension/streamid_adapter" + }, + { + "path": "../../../ten_packages/extension/openai_asr_python" + }, + { + "path": "../../../ten_packages/extension/openai_llm2_python" + }, + { + "path": "../../../ten_packages/extension/openai_gpt_image_python" + }, + { + "path": "../../../ten_packages/extension/message_collector2" + } + ], + "scripts": { + "start": "scripts/start.sh" + } +} diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json new file mode 100644 index 0000000000..cb20d4d857 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json @@ -0,0 +1,189 @@ +{ + "ten": { + "predefined_graphs": [ + { + "name": "voice_image_kids", + "auto_start": true, + "graph": { + "nodes": [ + { + "type": "extension", + "name": "agora_rtc", + "addon": "agora_rtc", + "extension_group": "default", + "property": { + "app_id": "${env:AGORA_APP_ID}", + "app_certificate": "${env:AGORA_APP_CERTIFICATE|}", + "channel": "voice_image_kids", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": false, + "publish_data": true, + "enable_agora_asr": false + } + }, + { + "type": "extension", + "name": "stt", + "addon": "openai_asr_python", + "extension_group": "stt", + "property": { + "api_key": "${env:OPENAI_API_KEY}", + "model": "whisper-1", + "language": "en" + } + }, + { + "type": "extension", + "name": "llm", + "addon": "openai_llm2_python", + "extension_group": "llm", + "property": { + "base_url": "https://api.openai.com/v1", + "api_key": "${env:OPENAI_API_KEY}", + "model": "${env:OPENAI_MODEL|gpt-4o-mini}", + "max_tokens": 512, + "prompt": "You are a friendly AI art assistant for kids! When children describe what they want to draw, help them create amazing images.\n\nGuidelines:\n- Keep responses short, fun, and encouraging\n- When they describe an image idea, use the generate_image tool immediately\n- Make prompts detailed and vivid (add colors, mood, style details)\n- After generating, celebrate their creativity!\n- If the image can't be created, gently suggest something similar\n\nExample:\nKid: \"I want a purple dragon!\"\nYou: \"Ooh, a purple dragon! Let me create that for you!\" [calls generate_image with detailed prompt]", + "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "name": "image_gen_tool", + "addon": "openai_gpt_image_python", + "extension_group": "tools", + "property": { + "params": { + "api_key": "${env:OPENAI_API_KEY}", + "model": "gpt-image-1.5", + "size": "1024x1024", + "quality": "standard", + "fallback_model": "dall-e-3" + } + } + }, + { + "type": "extension", + "name": "main_control", + "addon": "main_python", + "extension_group": "control", + "property": { + "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!" + } + }, + { + "type": "extension", + "name": "message_collector", + "addon": "message_collector2", + "extension_group": "transcriber", + "property": {} + }, + { + "type": "extension", + "name": "streamid_adapter", + "addon": "streamid_adapter", + "property": {} + } + ], + "connections": [ + { + "extension": "main_control", + "cmd": [ + { + "names": [ + "on_user_joined", + "on_user_left" + ], + "source": [ + { + "extension": "agora_rtc" + } + ] + }, + { + "names": [ + "tool_register" + ], + "source": [ + { + "extension": "image_gen_tool" + } + ] + } + ], + "data": [ + { + "name": "asr_result", + "source": [ + { + "extension": "stt" + } + ] + } + ] + }, + { + "extension": "agora_rtc", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension": "streamid_adapter" + } + ] + } + ], + "data": [ + { + "name": "data", + "source": [ + { + "extension": "message_collector" + } + ] + } + ] + }, + { + "extension": "streamid_adapter", + "audio_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension": "stt" + } + ] + } + ] + } + ] + } + } + ], + "log": { + "handlers": [ + { + "matchers": [ + { + "level": "info" + } + ], + "formatter": { + "type": "plain", + "colored": true + }, + "emitter": { + "type": "console", + "config": { + "stream": "stdout" + } + } + } + ] + } + } +} diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh b/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh new file mode 100755 index 0000000000..67b05e92e2 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash + +# Set default pip install command +PIP_INSTALL_CMD=${PIP_INSTALL_CMD:-"uv pip install --system"} + +install_python_requirements() { + local app_dir=$1 + + echo "Starting Python dependencies installation..." + + + # Install Twilio server Python dependencies + if [[ -f "$app_dir/../server/requirements.txt" ]]; then + echo "Installing Twilio server Python dependencies..." + ${PIP_INSTALL_CMD} -r "$app_dir/../server/requirements.txt" + else + echo "No requirements.txt found in server directory: $app_dir/../server" + fi + + # Traverse ten_packages/extension directory to find requirements.txt + if [[ -d "$app_dir/ten_packages/extension" ]]; then + echo "Traversing ten_packages/extension directory..." + for extension in "$app_dir/ten_packages/extension"/*; do + if [[ -d "$extension" && -f "$extension/requirements.txt" ]]; then + echo "Found requirements.txt in $extension, installing dependencies..." + ${PIP_INSTALL_CMD} -r "$extension/requirements.txt" + fi + done + else + echo "ten_packages/extension directory not found" + fi + + # Traverse ten_packages/system directory to find requirements.txt + if [[ -d "$app_dir/ten_packages/system" ]]; then + echo "Traversing ten_packages/system directory..." + for extension in "$app_dir/ten_packages/system"/*; do + if [[ -d "$extension" && -f "$extension/requirements.txt" ]]; then + echo "Found requirements.txt in $extension, installing dependencies..." + ${PIP_INSTALL_CMD} -r "$extension/requirements.txt" + fi + done + else + echo "ten_packages/system directory not found" + fi + + echo "Python dependencies installation completed!" +} + +build_go_app() { + local app_dir=$1 + cd $app_dir + + # Check if the Go build tool exists before trying to build + if [[ -f "$app_dir/ten_packages/system/ten_runtime_go/tools/build/main.go" ]]; then + echo "Building Go app..." + go run "$app_dir/ten_packages/system/ten_runtime_go/tools/build/main.go" --verbose + local build_result=$? + # Exit code 1 with "no buildable Go source files" is OK for Python-only apps + if [[ $build_result -ne 0 ]]; then + # Check if this is just a "no Go source files" error (which is fine for Python apps) + echo "Go build returned non-zero exit code, but this may be OK for Python-only apps" + fi + else + echo "Go build tool not found, skipping Go app build (this is normal if ten_runtime_go is not installed yet)" + fi +} + +main() { + # Get the parent directory of script location as app root directory + APP_HOME=$( + cd $(dirname $0)/.. + pwd + ) + + echo "App root directory: $APP_HOME" + echo "Using pip command: $PIP_INSTALL_CMD" + + # Check if manifest.json exists + if [[ ! -f "$APP_HOME/manifest.json" ]]; then + echo "Error: manifest.json file not found" + exit 1 + fi + + build_go_app "$APP_HOME" + + # Install Python dependencies + install_python_requirements "$APP_HOME" +} + +# If script is executed directly, run main function +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh b/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh new file mode 100755 index 0000000000..0d77cd1e55 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +#export TEN_ENABLE_PYTHON_DEBUG=true +#export TEN_PYTHON_DEBUG_PORT=5678 +export PYTHONPATH=$(pwd)/ten_packages/system/ten_ai_base/interface:$PYTHONPATH +export LD_LIBRARY_PATH=$(pwd)/ten_packages/system/agora_rtc_sdk/lib:$(pwd)/ten_packages/extension/agora_rtm/lib:$(pwd)/ten_packages/system/azure_speech_sdk/lib +export NODE_PATH=$(pwd)/ten_packages/system/ten_runtime_nodejs/lib:$NODE_PATH + +exec bin/main "$@" diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py new file mode 100644 index 0000000000..0413aa9b81 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py @@ -0,0 +1,8 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from . import addon + +__all__ = ["addon"] diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py new file mode 100644 index 0000000000..b67d451051 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py @@ -0,0 +1,20 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from ten_runtime import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import MainControlExtension + + +@register_addon_as_extension("main_python") +class MainControlExtensionAddon(Addon): + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + ten_env.log_info("MainControlExtension: on_create_instance") + ten_env.on_create_instance_done( + MainControlExtension(name), context + ) diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/__init__.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py new file mode 100644 index 0000000000..ef61df2621 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py @@ -0,0 +1,225 @@ +import asyncio +import json +from typing import Awaitable, Callable, Optional +from .llm_exec import LLMExec +from ten_runtime import AsyncTenEnv, Cmd, CmdResult, Data, StatusCode +from ten_ai_base.types import LLMToolMetadata +from .events import * + + +class Agent: + def __init__(self, ten_env: AsyncTenEnv): + self.ten_env: AsyncTenEnv = ten_env + self.stopped = False + + # Callback registry + self._callbacks: dict[ + AgentEvent, list[Callable[[AgentEvent], Awaitable]] + ] = {} + + # Queues for ordered processing + self._asr_queue: asyncio.Queue[ASRResultEvent] = asyncio.Queue() + self._llm_queue: asyncio.Queue[LLMResponseEvent] = asyncio.Queue() + + # Current consumer tasks + self._asr_consumer: Optional[asyncio.Task] = None + self._llm_consumer: Optional[asyncio.Task] = None + self._llm_active_task: Optional[asyncio.Task] = ( + None # currently running handler + ) + + self.llm_exec = LLMExec(ten_env) + self.llm_exec.on_response = ( + self._on_llm_response + ) # callback handled internally + self.llm_exec.on_reasoning_response = ( + self._on_llm_reasoning_response + ) # callback handled internally + + # Start consumers + self._asr_consumer = asyncio.create_task(self._consume_asr()) + self._llm_consumer = asyncio.create_task(self._consume_llm()) + + # === Register handlers === + def on( + self, + event_type: AgentEvent, + handler: Callable[[AgentEvent], Awaitable] = None, + ): + """ + Register a callback for a given event type. + + Can be used in two ways: + 1) agent.on(EventType, handler) + 2) @agent.on(EventType) + async def handler(event: EventType): ... + """ + + def decorator(func: Callable[[AgentEvent], Awaitable]): + self._callbacks.setdefault(event_type, []).append(func) + return func + + if handler is None: + return decorator + else: + return decorator(handler) + + async def _dispatch(self, event: AgentEvent): + """Dispatch event to registered handlers sequentially.""" + for etype, handlers in self._callbacks.items(): + if isinstance(event, etype): + for h in handlers: + try: + await h(event) + except asyncio.CancelledError: + raise + except Exception as e: + self.ten_env.log_error( + f"Handler error for {etype}: {e}" + ) + + # === Consumers === + async def _consume_asr(self): + while not self.stopped: + event = await self._asr_queue.get() + await self._dispatch(event) + + async def _consume_llm(self): + while not self.stopped: + event = await self._llm_queue.get() + # Run handler as a task so we can cancel mid-flight + self._llm_active_task = asyncio.create_task(self._dispatch(event)) + try: + await self._llm_active_task + except asyncio.CancelledError: + self.ten_env.log_info("[Agent] Active LLM task cancelled") + finally: + self._llm_active_task = None + + # === Emit events === + async def _emit_asr(self, event: ASRResultEvent): + await self._asr_queue.put(event) + + async def _emit_llm(self, event: LLMResponseEvent): + await self._llm_queue.put(event) + + async def _emit_direct(self, event: AgentEvent): + await self._dispatch(event) + + # === Incoming from runtime === + async def on_cmd(self, cmd: Cmd): + try: + name = cmd.get_name() + if name == "on_user_joined": + await self._emit_direct(UserJoinedEvent()) + elif name == "on_user_left": + await self._emit_direct(UserLeftEvent()) + elif name == "tool_register": + tool_json, err = cmd.get_property_to_json("tool") + if err: + raise RuntimeError(f"Invalid tool metadata: {err}") + tool = LLMToolMetadata.model_validate_json(tool_json) + await self._emit_direct( + ToolRegisterEvent( + tool=tool, source=cmd.get_source().extension_name + ) + ) + else: + self.ten_env.log_warn(f"Unhandled cmd: {name}") + + await self.ten_env.return_result( + CmdResult.create(StatusCode.OK, cmd) + ) + except Exception as e: + self.ten_env.log_error(f"on_cmd error: {e}") + await self.ten_env.return_result( + CmdResult.create(StatusCode.ERROR, cmd) + ) + + async def on_data(self, data: Data): + try: + if data.get_name() == "asr_result": + asr_json, _ = data.get_property_to_json(None) + asr = json.loads(asr_json) + await self._emit_asr( + ASRResultEvent( + text=asr.get("text", ""), + final=asr.get("final", False), + metadata=asr.get("metadata", {}), + ) + ) + else: + self.ten_env.log_warn(f"Unhandled data: {data.get_name()}") + except Exception as e: + self.ten_env.log_error(f"on_data error: {e}") + + async def _on_llm_response( + self, ten_env: AsyncTenEnv, delta: str, text: str, is_final: bool + ): + await self._emit_llm( + LLMResponseEvent(delta=delta, text=text, is_final=is_final) + ) + + async def _on_llm_reasoning_response( + self, ten_env: AsyncTenEnv, delta: str, text: str, is_final: bool + ): + """ + Internal callback for streaming LLM output, wrapped as an AgentEvent. + """ + await self._emit_llm( + LLMResponseEvent( + delta=delta, text=text, is_final=is_final, type="reasoning" + ) + ) + + # === LLM control === + async def register_llm_tool(self, tool: LLMToolMetadata, source: str): + """ + Register tools with the LLM. + This method sends a command to register the provided tools. + """ + await self.llm_exec.register_tool(tool, source) + + async def queue_llm_input(self, text: str): + """ + Queue a new message to the LLM context. + This method sends the text input to the LLM for processing. + """ + await self.llm_exec.queue_input(text) + + async def flush_llm(self): + """ + Flush the LLM input queue. + This will ensure that all queued inputs are processed. + """ + await self.llm_exec.flush() + + # Clear queue + while not self._llm_queue.empty(): + try: + self._llm_queue.get_nowait() + self._llm_queue.task_done() + except asyncio.QueueEmpty: + break + + # Cancel active LLM task + if self._llm_active_task and not self._llm_active_task.done(): + self._llm_active_task.cancel() + try: + await self._llm_active_task + except asyncio.CancelledError: + pass + self._llm_active_task = None + + async def stop(self): + """ + Stop the agent processing. + This will stop the event queue and any ongoing tasks. + """ + self.stopped = True + await self.llm_exec.stop() + await self.flush_llm() + if self._asr_consumer: + self._asr_consumer.cancel() + if self._llm_consumer: + self._llm_consumer.cancel() diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py new file mode 100644 index 0000000000..091178135d --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py @@ -0,0 +1,16 @@ +from .events import AgentEvent + + +def agent_event_handler(event_type: AgentEvent): + """ + Decorator to mark a method as an Agent event handler. + Usage: + @agent_event_handler(ASRResultEvent) + async def on_asr(self, event: ASRResultEvent): ... + """ + + def wrapper(func): + setattr(func, "_agent_event_type", event_type) + return func + + return wrapper diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py new file mode 100644 index 0000000000..df61ec5c2f --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py @@ -0,0 +1,73 @@ +from pydantic import BaseModel +from typing import Literal, Union, Dict, Any +from ten_ai_base.types import LLMToolMetadata + + +# ==== Base Event ==== + + +class AgentEventBase(BaseModel): + """Base class for all agent-level events.""" + + type: Literal["cmd", "data"] + name: str + + +# ==== CMD Events ==== + + +class UserJoinedEvent(AgentEventBase): + """Event triggered when a user joins the session.""" + + type: Literal["cmd"] = "cmd" + name: Literal["on_user_joined"] = "on_user_joined" + + +class UserLeftEvent(AgentEventBase): + """Event triggered when a user leaves the session.""" + + type: Literal["cmd"] = "cmd" + name: Literal["on_user_left"] = "on_user_left" + + +class ToolRegisterEvent(AgentEventBase): + """Event triggered when a tool is registered by the user.""" + + type: Literal["cmd"] = "cmd" + name: Literal["tool_register"] = "tool_register" + tool: LLMToolMetadata + source: str + + +# ==== DATA Events ==== + + +class ASRResultEvent(AgentEventBase): + """Event triggered when ASR result is received (partial or final).""" + + type: Literal["data"] = "data" + name: Literal["asr_result"] = "asr_result" + text: str + final: bool + metadata: Dict[str, Any] + + +class LLMResponseEvent(AgentEventBase): + """Event triggered when LLM returns a streaming response.""" + + type: Literal["message", "reasoning"] = "message" + name: Literal["llm_response"] = "llm_response" + delta: str + text: str + is_final: bool + + +# ==== Unified Event Union ==== + +AgentEvent = Union[ + UserJoinedEvent, + UserLeftEvent, + ToolRegisterEvent, + ASRResultEvent, + LLMResponseEvent, +] diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py new file mode 100644 index 0000000000..5e57598d1c --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py @@ -0,0 +1,278 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import asyncio +import json +import traceback +from typing import Awaitable, Callable, Literal, Optional +from ten_ai_base.const import CMD_PROPERTY_RESULT +from ten_ai_base.helper import AsyncQueue +from ten_ai_base.struct import ( + LLMMessage, + LLMMessageContent, + LLMMessageFunctionCall, + LLMMessageFunctionCallOutput, + LLMRequest, + LLMResponse, + LLMResponseMessageDelta, + LLMResponseMessageDone, + LLMResponseReasoningDelta, + LLMResponseReasoningDone, + LLMResponseToolCall, + parse_llm_response, +) +from ten_ai_base.types import LLMToolMetadata, LLMToolResult +from ..helper import _send_cmd, _send_cmd_ex +from ten_runtime import AsyncTenEnv, Loc, StatusCode +import uuid + + +class LLMExec: + """ + Context for LLM operations, including ASR and TTS. + This class handles the interaction with the LLM, including processing commands and data. + """ + + def __init__(self, ten_env: AsyncTenEnv): + self.ten_env = ten_env + self.input_queue = AsyncQueue() + self.stopped = False + self.on_response: Optional[ + Callable[[AsyncTenEnv, str, str, bool], Awaitable[None]] + ] = None + self.on_reasoning_response: Optional[ + Callable[[AsyncTenEnv, str, str, bool], Awaitable[None]] + ] = None + self.on_tool_call: Optional[ + Callable[[AsyncTenEnv, LLMToolMetadata], Awaitable[None]] + ] = None + self.current_task: Optional[asyncio.Task] = None + self.loop = asyncio.get_event_loop() + self.loop.create_task(self._process_input_queue()) + self.available_tools: list[LLMToolMetadata] = [] + self.tool_registry: dict[str, str] = {} + self.available_tools_lock = ( + asyncio.Lock() + ) # Lock to ensure thread-safe access + self.contexts: list[LLMMessage] = [] + self.current_request_id: Optional[str] = None + self.current_text = None + + async def queue_input(self, item: str) -> None: + await self.input_queue.put(item) + + async def flush(self) -> None: + """ + Flush the input queue to ensure all items are processed. + This is useful for ensuring that all pending inputs are handled before stopping. + """ + await self.input_queue.flush() + if self.current_request_id: + request_id = self.current_request_id + self.current_request_id = None + await _send_cmd( + self.ten_env, "abort", "llm", {"request_id": request_id} + ) + if self.current_task: + self.current_task.cancel() + + async def stop(self) -> None: + """ + Stop the LLMExec processing. + This will stop the input queue processing and any ongoing tasks. + """ + self.stopped = True + await self.flush() + if self.current_task: + self.current_task.cancel() + + async def register_tool(self, tool: LLMToolMetadata, source: str) -> None: + """ + Register tools with the LLM. + This method sends a command to register the provided tools. + """ + async with self.available_tools_lock: + self.available_tools.append(tool) + self.tool_registry[tool.name] = source + + async def _process_input_queue(self): + """ + Process the input queue for commands and data. + This method runs in a loop, processing items from the queue. + """ + while not self.stopped: + try: + text = await self.input_queue.get() + new_message = LLMMessageContent(role="user", content=text) + self.current_task = self.loop.create_task( + self._send_to_llm(self.ten_env, new_message) + ) + await self.current_task + except asyncio.CancelledError: + self.ten_env.log_info("LLMExec processing cancelled.") + text = self.current_text + self.current_text = None + if self.on_response and text: + await self.on_response(self.ten_env, "", text, True) + except Exception as e: + self.ten_env.log_error( + f"Error processing input queue: {traceback.format_exc()}" + ) + finally: + self.current_task = None + + async def _queue_context( + self, ten_env: AsyncTenEnv, new_message: LLMMessage + ) -> None: + """ + Queue a new message to the LLM context. + This method appends the new message to the existing context and sends it to the LLM. + """ + ten_env.log_info(f"_queue_context: {new_message}") + self.contexts.append(new_message) + + async def _write_context( + self, + ten_env: AsyncTenEnv, + role: Literal["user", "assistant"], + content: str, + ) -> None: + last_context = self.contexts[-1] if self.contexts else None + if last_context and last_context.role == role: + # If the last context has the same role, append to its content + last_context.content = content + else: + # Otherwise, create a new context message + new_message = LLMMessageContent(role=role, content=content) + await self._queue_context(ten_env, new_message) + + async def _send_to_llm( + self, ten_env: AsyncTenEnv, new_message: LLMMessage + ) -> None: + messages = self.contexts.copy() + messages.append(new_message) + request_id = str(uuid.uuid4()) + self.current_request_id = request_id + llm_input = LLMRequest( + request_id=request_id, + messages=messages, + streaming=True, + parameters={"temperature": 0.7}, + tools=self.available_tools, + ) + input_json = llm_input.model_dump() + response = _send_cmd_ex(ten_env, "chat_completion", "llm", input_json) + + # Queue the new message to the context + await self._queue_context(ten_env, new_message) + + async for cmd_result, _ in response: + if cmd_result and cmd_result.is_final() is False: + if cmd_result.get_status_code() == StatusCode.OK: + response_json, _ = cmd_result.get_property_to_json(None) + ten_env.log_info( + f"_send_to_llm: response_json {response_json}" + ) + completion = parse_llm_response(response_json) + await self._handle_llm_response(completion) + + async def _handle_llm_response(self, llm_output: LLMResponse | None): + self.ten_env.log_info(f"_handle_llm_response: {llm_output}") + + match llm_output: + case LLMResponseMessageDelta(): + delta = llm_output.delta + text = llm_output.content + self.current_text = text + if delta and self.on_response: + await self.on_response(self.ten_env, delta, text, False) + if text: + await self._write_context(self.ten_env, "assistant", text) + case LLMResponseMessageDone(): + text = llm_output.content + self.current_text = None + if self.on_response and text: + await self.on_response(self.ten_env, "", text, True) + case LLMResponseReasoningDelta(): + delta = llm_output.delta + text = llm_output.content + if delta and self.on_reasoning_response: + await self.on_reasoning_response( + self.ten_env, delta, text, False + ) + case LLMResponseReasoningDone(): + text = llm_output.content + if self.on_reasoning_response and text: + await self.on_reasoning_response( + self.ten_env, "", text, True + ) + case LLMResponseToolCall(): + self.ten_env.log_info( + f"_handle_llm_response: invoking tool call {llm_output.name}" + ) + src_extension_name = self.tool_registry.get(llm_output.name) + result, _ = await _send_cmd( + self.ten_env, + "tool_call", + src_extension_name, + { + "name": llm_output.name, + "arguments": llm_output.arguments, + }, + ) + + if result.get_status_code() == StatusCode.OK: + r, _ = result.get_property_to_json(CMD_PROPERTY_RESULT) + tool_result: LLMToolResult = json.loads(r) + + self.ten_env.log_info(f"tool_result: {tool_result}") + + context_function_call = LLMMessageFunctionCall( + name=llm_output.name, + arguments=json.dumps(llm_output.arguments), + call_id=llm_output.tool_call_id, + id=llm_output.response_id, + type="function_call", + ) + if tool_result["type"] == "llmresult": + result_content = tool_result["content"] + if isinstance(result_content, str): + await self._queue_context( + self.ten_env, context_function_call + ) + await self._send_to_llm( + self.ten_env, + LLMMessageFunctionCallOutput( + output=result_content, + call_id=llm_output.tool_call_id, + type="function_call_output", + ), + ) + else: + self.ten_env.log_error( + f"Unknown tool result content: {result_content}" + ) + elif tool_result["type"] == "requery": + pass + # self.memory_cache = [] + # self.memory_cache.pop() + # result_content = tool_result["content"] + # nonlocal message + # new_message = { + # "role": "user", + # "content": self._convert_to_content_parts( + # message["content"] + # ), + # } + # new_message["content"] = new_message[ + # "content" + # ] + self._convert_to_content_parts( + # result_content + # ) + # await self.queue_input_item( + # True, messages=[new_message], no_tool=True + # ) + else: + self.ten_env.log_error("Tool call failed") diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py new file mode 100644 index 0000000000..89e3486331 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py @@ -0,0 +1,14 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from pydantic import BaseModel, Field + + +class MainControlConfig(BaseModel): + """Main control configuration for voice-image-kids app""" + greeting: str = Field( + default="Hi! I'm your AI art friend! Tell me what you'd like to draw!", + description="Greeting message when user joins" + ) diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py new file mode 100644 index 0000000000..a94e54134f --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py @@ -0,0 +1,168 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from typing import Literal + +from .agent.decorators import agent_event_handler +from ten_runtime import ( + AsyncExtension, + AsyncTenEnv, + Cmd, + Data, +) + +from .agent.agent import Agent +from .agent.events import ( + ASRResultEvent, + LLMResponseEvent, + ToolRegisterEvent, + UserJoinedEvent, + UserLeftEvent, +) +from .helper import _send_data +from .config import MainControlConfig + + +class MainControlExtension(AsyncExtension): + """ + Main control extension for voice-image-kids app. + Simplified version without TTS - images are the primary output. + """ + + def __init__(self, name: str): + super().__init__(name) + self.ten_env: AsyncTenEnv = None + self.agent: Agent = None + self.config: MainControlConfig = None + + self.stopped: bool = False + self._rtc_user_count: int = 0 + self.turn_id: int = 0 + self.session_id: str = "0" + + def _current_metadata(self) -> dict: + return {"session_id": self.session_id, "turn_id": self.turn_id} + + async def on_init(self, ten_env: AsyncTenEnv): + self.ten_env = ten_env + + # Load config from runtime properties + config_json, _ = await ten_env.get_property_to_json(None) + self.config = MainControlConfig.model_validate_json(config_json) + + self.agent = Agent(ten_env) + + # Auto-register decorated methods + for attr_name in dir(self): + fn = getattr(self, attr_name) + event_type = getattr(fn, "_agent_event_type", None) + if event_type: + self.agent.on(event_type, fn) + + # === Event Handlers === + @agent_event_handler(UserJoinedEvent) + async def _on_user_joined(self, event: UserJoinedEvent): + """Handle user joining the session""" + self._rtc_user_count += 1 + if self._rtc_user_count == 1 and self.config and self.config.greeting: + # Send greeting message to frontend + await self._send_transcript( + "assistant", self.config.greeting, True, 100 + ) + + @agent_event_handler(UserLeftEvent) + async def _on_user_left(self, event: UserLeftEvent): + """Handle user leaving the session""" + self._rtc_user_count -= 1 + + @agent_event_handler(ToolRegisterEvent) + async def _on_tool_register(self, event: ToolRegisterEvent): + """Register LLM tools (e.g., image generation)""" + await self.agent.register_llm_tool(event.tool, event.source) + + @agent_event_handler(ASRResultEvent) + async def _on_asr_result(self, event: ASRResultEvent): + """Handle speech recognition results""" + self.session_id = event.metadata.get("session_id", "100") + stream_id = int(self.session_id) + if not event.text: + return + if event.final: + self.turn_id += 1 + # Send user's speech to LLM for processing + await self.agent.queue_llm_input(event.text) + # Show transcript to user + await self._send_transcript("user", event.text, event.final, stream_id) + + @agent_event_handler(LLMResponseEvent) + async def _on_llm_response(self, event: LLMResponseEvent): + """Handle LLM responses (including tool calls for image generation)""" + # Show LLM response to user + await self._send_transcript( + "assistant", + event.text, + event.is_final, + 100, + data_type=("reasoning" if event.type == "reasoning" else "text"), + ) + + # === Lifecycle Hooks === + async def on_start(self, ten_env: AsyncTenEnv): + ten_env.log_info("[MainControlExtension] on_start") + + async def on_stop(self, ten_env: AsyncTenEnv): + ten_env.log_info("[MainControlExtension] on_stop") + self.stopped = True + await self.agent.stop() + + async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd): + await self.agent.on_cmd(cmd) + + async def on_data(self, ten_env: AsyncTenEnv, data: Data): + await self.agent.on_data(data) + + # === Helper Methods === + async def _send_transcript( + self, + role: str, + text: str, + final: bool, + stream_id: int, + data_type: Literal["text", "reasoning"] = "text", + ): + """Send transcript to message collector for chat UI""" + if data_type == "text": + await _send_data( + self.ten_env, + "message", + "message_collector", + { + "data_type": "transcribe", + "text_data": { + "text": text, + "is_final": final, + "stream_id": stream_id, + "end_of_segment": final, + "role": role, + }, + **self._current_metadata(), + }, + ) + elif data_type == "reasoning": + await _send_data( + self.ten_env, + "message", + "message_collector", + { + "data_type": "reasoning", + "text_data": { + "text": text, + "is_final": final, + "stream_id": stream_id, + "end_of_segment": final, + }, + **self._current_metadata(), + }, + ) diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py new file mode 100644 index 0000000000..29ec69eac4 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py @@ -0,0 +1,88 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# + +import json +from typing import Any, AsyncGenerator, Optional +from ten_runtime import AsyncTenEnv, Cmd, CmdResult, Data, Loc, TenError + + +def is_punctuation(char): + if char in [",", ",", ".", "。", "?", "?", "!", "!"]: + return True + return False + + +def parse_sentences(sentence_fragment, content): + sentences = [] + current_sentence = sentence_fragment + for char in content: + current_sentence += char + if is_punctuation(char): + # Check if the current sentence contains non-punctuation characters + stripped_sentence = current_sentence + if any(c.isalnum() for c in stripped_sentence): + sentences.append(stripped_sentence) + current_sentence = "" # Reset for the next sentence + + remain = current_sentence # Any remaining characters form the incomplete sentence + return sentences, remain + + +async def _send_cmd( + ten_env: AsyncTenEnv, cmd_name: str, dest: str, payload: Any = None +) -> tuple[Optional[CmdResult], Optional[TenError]]: + """ + Convenient method to send a command with a payload within app/graph w/o need to create a connection. + Note: extension using this approach will contain logics that are meaningful for this graph only, + as it will assume the target extension already exists in the graph. + For generate purpose extension, it should try to prevent using this method. + """ + cmd = Cmd.create(cmd_name) + loc = Loc("", "", dest) + cmd.set_dests([loc]) + if payload is not None: + cmd.set_property_from_json(None, json.dumps(payload)) + ten_env.log_debug(f"send_cmd: cmd_name {cmd_name}, dest {dest}") + + return await ten_env.send_cmd(cmd) + + +async def _send_cmd_ex( + ten_env: AsyncTenEnv, cmd_name: str, dest: str, payload: Any = None +) -> AsyncGenerator[tuple[Optional[CmdResult], Optional[TenError]], None]: + """Convenient method to send a command with a payload within app/graph w/o need to create a connection. + Note: extension using this approach will contain logics that are meaningful for this graph only, + as it will assume the target extension already exists in the graph. + For generate purpose extension, it should try to prevent using this method. + """ + cmd = Cmd.create(cmd_name) + loc = Loc("", "", dest) + cmd.set_dests([loc]) + if payload is not None: + cmd.set_property_from_json(None, json.dumps(payload)) + ten_env.log_debug(f"send_cmd_ex: cmd_name {cmd_name}, dest {dest}") + + async for cmd_result, ten_error in ten_env.send_cmd_ex(cmd): + if cmd_result: + ten_env.log_debug(f"send_cmd_ex: cmd_result {cmd_result}") + yield cmd_result, ten_error + + +async def _send_data( + ten_env: AsyncTenEnv, data_name: str, dest: str, payload: Any = None +) -> Optional[TenError]: + """Convenient method to send data with a payload within app/graph w/o need to create a connection. + Note: extension using this approach will contain logics that are meaningful for this graph only, + as it will assume the target extension already exists in the graph. + For generate purpose extension, it should try to prevent using this method. + """ + data = Data.create(data_name) + loc = Loc("", "", dest) + data.set_dests([loc]) + if payload is not None: + data.set_property_from_json(None, json.dumps(payload)) + ten_env.log_debug(f"send_data: data_name {data_name}, dest {dest}") + return await ten_env.send_data(data) diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json new file mode 100644 index 0000000000..4cf35fbc6a --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json @@ -0,0 +1,34 @@ +{ + "type": "extension", + "name": "main_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.11" + }, + { + "type": "system", + "name": "ten_ai_base", + "version": "0.7" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "**.py", + "README.md" + ] + }, + "api": { + "property": { + "properties": { + "greeting": { + "type": "string" + } + } + } + } +} diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json new file mode 100644 index 0000000000..adeda64500 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json @@ -0,0 +1,3 @@ +{ + "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!" +} diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt new file mode 100644 index 0000000000..65b1b0c122 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt @@ -0,0 +1 @@ +pydantic>=2.0.0 diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md new file mode 100644 index 0000000000..32228aa8e3 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md @@ -0,0 +1,221 @@ +# OpenAI GPT Image 1.5 Extension + +A TEN Framework extension for generating images using OpenAI's GPT Image 1.5 model with automatic fallback to DALL-E 3. + +## Features + +- **GPT Image 1.5 Support**: Uses OpenAI's latest and fastest image generation model +- **Automatic Fallback**: Falls back to DALL-E 3 if GPT Image 1.5 is unavailable +- **LLM Tool Integration**: Works as a callable tool within conversational flows +- **Kid-Friendly Error Messages**: Provides gentle, appropriate error responses +- **Azure OpenAI Support**: Compatible with Azure OpenAI endpoints +- **Quality Control**: Supports both standard and HD quality generation +- **Flexible Configuration**: Environment variable-based configuration + +## Installation + +This extension is part of the TEN Framework. It will be automatically installed when you run: + +```bash +cd your_agent/tenapp +tman install +``` + +## Configuration + +### Environment Variables + +```bash +# Required +OPENAI_API_KEY=sk-your_openai_key_here + +# Optional +OPENAI_IMAGE_BASE_URL=https://api.openai.com/v1 # Custom endpoint +AZURE_OPENAI_IMAGE_ENDPOINT=https://your.openai.azure.com # For Azure +AZURE_OPENAI_IMAGE_API_VERSION=2024-02-01 # For Azure +``` + +### Property Configuration + +Add to your agent's `property.json`: + +```json +{ + "type": "extension", + "name": "image_gen_tool", + "addon": "openai_gpt_image_python", + "property": { + "params": { + "api_key": "${env:OPENAI_API_KEY}", + "model": "gpt-image-1.5", + "size": "1024x1024", + "quality": "standard", + "fallback_model": "dall-e-3" + } + } +} +``` + +### Configuration Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `params.api_key` | string | - | OpenAI API key (required) | +| `params.model` | string | `gpt-image-1.5` | Image model to use | +| `params.size` | string | `1024x1024` | Image dimensions (1024x1024, 1792x1024, 1024x1792) | +| `params.quality` | string | `standard` | Image quality (standard, hd) | +| `params.fallback_model` | string | `dall-e-3` | Fallback model if primary unavailable | +| `params.vendor` | string | `openai` | API vendor (openai, azure) | +| `params.base_url` | string | - | Custom API base URL (optional) | +| `dump` | boolean | `false` | Enable response logging for debugging | +| `dump_path` | string | `./openai_image_responses.json` | Path for debug logs | + +## Usage + +### As an LLM Tool + +The extension registers as a tool that LLMs can call during conversations: + +```python +# The LLM will automatically call this when users request images +# Example user input: "Create a purple dragon!" +# The LLM calls: generate_image(prompt="A majestic purple dragon...") +``` + +### Tool Metadata + +- **Tool Name**: `generate_image` +- **Parameters**: + - `prompt` (required): Detailed image description + - `quality` (optional): Override quality setting (`standard` or `hd`) + +### Graph Integration + +Connect the extension in your agent graph: + +```json +{ + "connections": [ + { + "extension": "llm", + "cmd": [ + { + "name": "tool_register", + "dest": [{"extension": "image_gen_tool"}] + } + ] + }, + { + "extension": "image_gen_tool", + "data": [ + { + "name": "content_data", + "dest": [{"extension": "main_control"}] + } + ] + } + ] +} +``` + +## Output Format + +Generated images are sent as `content_data` messages with the following JSON structure: + +```json +{ + "data": { + "image_url": "https://oaidalleapiprodscus.blob.core.windows.net/..." + }, + "type": "image_url" +} +``` + +## Error Handling + +The extension provides user-friendly error messages: + +- **Content Policy Violation**: "I can't create that image. Let's try something different!" +- **Invalid API Key**: "API key is invalid. Please check your configuration." +- **Model Not Found**: Automatically falls back to DALL-E 3 +- **Rate Limit**: "Rate limit exceeded. Please try again later." +- **Generic Error**: "Something went wrong. Please try again." + +## Model Support + +### Supported Models + +- `gpt-image-1.5` (Primary) - Latest, fastest, 4x speed improvement +- `dall-e-3` (Fallback) - Previous generation, proven reliability +- `dall-e-2` (Legacy) - Older model, basic functionality + +### Image Sizes + +- `1024x1024` - Square (default) +- `1792x1024` - Landscape +- `1024x1792` - Portrait + +### Quality Modes + +- `standard` - Fast generation, good quality (default) +- `hd` - High detail, slower generation, higher cost + +## Development + +### Running Tests + +```bash +cd ten_packages/extension/openai_gpt_image_python +python -m pytest tests/ +``` + +### Debug Mode + +Enable debug logging by setting `dump: true` in your configuration: + +```json +{ + "dump": true, + "dump_path": "./debug_images.json" +} +``` + +This will save all requests and responses to the specified file. + +## Troubleshooting + +### Image Generation Fails + +1. **Check API Key**: Ensure `OPENAI_API_KEY` is set correctly +2. **Verify Model Access**: GPT Image 1.5 requires API access +3. **Check Prompt**: Ensure prompt doesn't violate content policies +4. **Review Logs**: Check TEN runtime logs for detailed error messages + +### Azure OpenAI Setup + +For Azure OpenAI, configure: + +```json +{ + "params": { + "api_key": "${env:AZURE_OPENAI_KEY}", + "vendor": "azure", + "azure_endpoint": "${env:AZURE_OPENAI_IMAGE_ENDPOINT}", + "azure_api_version": "2024-02-01" + } +} +``` + +## Examples + +See the [voice-image-kids](../../../examples/voice-image-kids/) example for a complete implementation. + +## License + +This extension is part of the TEN Framework, licensed under the Apache License, Version 2.0. + +## Learn More + +- [OpenAI Images API Documentation](https://platform.openai.com/docs/guides/image-generation) +- [TEN Framework Documentation](https://doc.theten.ai) +- [GPT Image 1.5 Announcement](https://openai.com/index/new-chatgpt-images-is-here/) diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/__init__.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/__init__.py new file mode 100644 index 0000000000..72593ab225 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/__init__.py @@ -0,0 +1,6 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from . import addon diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/addon.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/addon.py new file mode 100644 index 0000000000..84675025cf --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/addon.py @@ -0,0 +1,20 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from ten_runtime import ( + Addon, + register_addon_as_extension, + TenEnv, +) +from .extension import OpenAIGPTImageExtension + + +@register_addon_as_extension("openai_gpt_image_python") +class OpenAIGPTImageExtensionAddon(Addon): + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + ten_env.log_info("OpenAIGPTImageExtension: on_create_instance") + ten_env.on_create_instance_done( + OpenAIGPTImageExtension(name), context + ) diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py new file mode 100644 index 0000000000..d3a4c69eba --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py @@ -0,0 +1,80 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from typing import Any +import copy +from pydantic import BaseModel, Field +from ten_ai_base import utils + + +class OpenAIGPTImageConfig(BaseModel): + """OpenAI GPT Image 1.5 Configuration""" + + params: dict[str, Any] = Field( + default_factory=dict, + description="OpenAI Images API parameters" + ) + + dump: bool = Field( + default=False, + description="Enable response dumping for debugging" + ) + dump_path: str = Field( + default="./openai_image_responses.json", + description="Path to dump responses" + ) + + def validate(self) -> None: + """Validate required configuration""" + if "api_key" not in self.params or not self.params["api_key"]: + raise ValueError("API key is required (params.api_key)") + + if "model" not in self.params: + raise ValueError("Model is required (params.model)") + + # Validate model + valid_models = ["gpt-image-1.5", "dall-e-3", "dall-e-2"] + if self.params["model"] not in valid_models: + raise ValueError(f"Invalid model. Must be one of: {valid_models}") + + # Validate size if present + if "size" in self.params: + valid_sizes = ["1024x1024", "1792x1024", "1024x1792"] + if self.params["size"] not in valid_sizes: + raise ValueError(f"Invalid size. Must be one of: {valid_sizes}") + + # Validate quality if present + if "quality" in self.params: + valid_quality = ["standard", "hd"] + if self.params["quality"] not in valid_quality: + raise ValueError(f"Invalid quality. Must be one of: {valid_quality}") + + def update_params(self) -> None: + """Update/normalize parameters""" + # Set defaults for optional params + self.params.setdefault("size", "1024x1024") + self.params.setdefault("quality", "standard") + self.params.setdefault("n", 1) + self.params.setdefault("response_format", "url") + + # Remove vendor param if exists (internal only) + self.params.pop("vendor", None) + + # Ensure n=1 for simplicity + if self.params["n"] != 1: + self.params["n"] = 1 + + def to_str(self, sensitive_handling: bool = True) -> str: + """Convert config to string with optional sensitive data handling""" + if not sensitive_handling: + return f"{self}" + + config = copy.deepcopy(self) + + # Encrypt sensitive fields + if config.params and "api_key" in config.params: + config.params["api_key"] = utils.encrypt(config.params["api_key"]) + + return f"{config}" diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py new file mode 100644 index 0000000000..0f0b4ec66f --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py @@ -0,0 +1,285 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import json +from ten_runtime import ( + Data, + TenEnv, + AsyncTenEnv, +) +from ten_ai_base.const import ( + DATA_OUT_PROPERTY_END_OF_SEGMENT, + DATA_OUT_PROPERTY_TEXT, + CONTENT_DATA_OUT_NAME, + LOG_CATEGORY_KEY_POINT, + LOG_CATEGORY_VENDOR, +) +from ten_ai_base.types import LLMToolMetadataParameter, LLMToolResultLLMResult +from ten_ai_base.llm_tool import ( + AsyncLLMToolBaseExtension, + LLMToolMetadata, + LLMToolResult, +) +from .config import OpenAIGPTImageConfig +from .openai_image_client import ( + OpenAIImageClient, + ContentPolicyError, + InvalidAPIKeyError, + ModelNotFoundError, +) + + +class OpenAIGPTImageExtension(AsyncLLMToolBaseExtension): + """ + OpenAI GPT Image 1.5 Extension + + Provides AI image generation using OpenAI's GPT Image 1.5 model + with fallback to DALL-E 3. Integrates as an LLM tool for + conversational image creation. + """ + + def __init__(self, name: str): + super().__init__(name) + self.config: OpenAIGPTImageConfig = None + self.client: OpenAIImageClient = None + + async def on_start(self, ten_env: AsyncTenEnv) -> None: + """Initialize extension with configuration and client""" + await super().on_start(ten_env) + + # Load configuration from property.json + ten_env.log_info("Loading OpenAI GPT Image configuration...") + config_json_str, _ = await ten_env.get_property_to_json("") + self.config = OpenAIGPTImageConfig.model_validate_json(config_json_str) + + # Log config (with sensitive data encrypted) + ten_env.log_info( + f"Configuration loaded: {self.config.to_str()}", + category=LOG_CATEGORY_KEY_POINT + ) + + # Validate configuration + try: + self.config.validate() + self.config.update_params() + except ValueError as e: + ten_env.log_error(f"Configuration validation failed: {e}") + raise + + # Initialize OpenAI client + self.client = OpenAIImageClient(self.config, ten_env) + ten_env.log_info( + "OpenAI GPT Image client initialized successfully", + category=LOG_CATEGORY_KEY_POINT + ) + + async def on_stop(self, ten_env: AsyncTenEnv) -> None: + """Cleanup resources""" + await super().on_stop(ten_env) + + if self.client: + await self.client.cleanup() + ten_env.log_info("OpenAI client cleaned up") + + def get_tool_metadata(self, ten_env: TenEnv) -> list[LLMToolMetadata]: + """Register image generation tool with LLM""" + return [ + LLMToolMetadata( + name="generate_image", + description=( + "Generate an image from a text description using AI. " + "Creates high-quality, creative images based on detailed prompts. " + "Use this when the user asks to create, draw, make, or generate an image." + ), + parameters=[ + LLMToolMetadataParameter( + name="prompt", + type="string", + description=( + "Detailed description of the image to generate. " + "Include style, subject, mood, colors, and composition. " + "Be specific and descriptive for best results. " + "Use the same language as the user's request." + ), + required=True, + ), + LLMToolMetadataParameter( + name="quality", + type="string", + description=( + "Image quality: 'standard' for faster generation, " + "'hd' for higher detail (optional, defaults to configured value)" + ), + required=False, + ), + ], + ) + ] + + async def send_image( + self, async_ten_env: AsyncTenEnv, image_url: str + ) -> None: + """Send generated image URL to frontend via content_data""" + async_ten_env.log_info(f"Sending image URL: {image_url}") + + try: + # Format as JSON matching TEN content_data schema + payload = json.dumps({ + "data": { + "image_url": image_url + }, + "type": "image_url" + }) + + # Create content_data message + output_data = Data.create(CONTENT_DATA_OUT_NAME) + output_data.set_property_string(DATA_OUT_PROPERTY_TEXT, payload) + output_data.set_property_bool(DATA_OUT_PROPERTY_END_OF_SEGMENT, True) + + # Send asynchronously + await async_ten_env.send_data(output_data) + + async_ten_env.log_info( + "Image URL sent successfully", + category=LOG_CATEGORY_KEY_POINT + ) + + except Exception as err: + async_ten_env.log_error( + f"Failed to send image URL: {err}", + category=LOG_CATEGORY_VENDOR + ) + + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: + """Execute image generation tool""" + ten_env.log_info(f"run_tool {name} with args: {args}") + + if name != "generate_image": + return None + + prompt = args.get("prompt") + if not prompt or not prompt.strip(): + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": "No prompt provided. Please describe what image you want to create." + }), + ) + + try: + # Override quality if specified + quality = args.get("quality", self.config.params.get("quality")) + + # Generate image + ten_env.log_info( + f"Generating image with prompt: {prompt[:100]}...", + category=LOG_CATEGORY_KEY_POINT + ) + image_url = await self.client.generate_image( + prompt=prompt, + quality=quality, + ) + + # Send image to frontend + await self.send_image(ten_env, image_url) + + # Return success to LLM + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": True, + "image_url": image_url, + "message": "Image generated successfully!" + }), + ) + + except ContentPolicyError as e: + error_msg = "I can't create that image. Let's try something different!" + ten_env.log_warn( + f"Content policy violation: {e}", + category=LOG_CATEGORY_VENDOR + ) + + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": error_msg + }), + ) + + except InvalidAPIKeyError as e: + error_msg = "API key is invalid. Please check your configuration." + ten_env.log_error( + f"Invalid API key: {e}", + category=LOG_CATEGORY_VENDOR + ) + + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": error_msg + }), + ) + + except ModelNotFoundError as e: + # Try fallback model + fallback_model = self.config.params.get("fallback_model") + if fallback_model and fallback_model != self.client.current_model: + ten_env.log_warn( + f"Model {self.client.current_model} not available, " + f"falling back to {fallback_model}", + category=LOG_CATEGORY_KEY_POINT + ) + try: + image_url = await self.client.generate_image( + prompt=prompt, + quality=quality, + model_override=fallback_model + ) + await self.send_image(ten_env, image_url) + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": True, + "image_url": image_url, + "message": f"Image generated with {fallback_model}" + }), + ) + except Exception as fallback_error: + error_msg = "Image generation is temporarily unavailable." + ten_env.log_error( + f"Fallback also failed: {fallback_error}", + category=LOG_CATEGORY_VENDOR + ) + else: + error_msg = "Image generation model is not available." + + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": error_msg + }), + ) + + except Exception as e: + error_msg = "Something went wrong. Please try again." + ten_env.log_error( + f"Image generation failed: {e}", + category=LOG_CATEGORY_VENDOR + ) + + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": error_msg + }), + ) diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json new file mode 100644 index 0000000000..778f22441f --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json @@ -0,0 +1,148 @@ +{ + "type": "extension", + "name": "openai_gpt_image_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.11" + }, + { + "type": "system", + "name": "ten_ai_base", + "version": "0.7" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "requirements.txt", + "**.py", + "README.md" + ] + }, + "api": { + "property": { + "properties": { + "params": { + "type": "object", + "properties": { + "api_key": { + "type": "string" + }, + "base_url": { + "type": "string" + }, + "model": { + "type": "string" + }, + "size": { + "type": "string" + }, + "quality": { + "type": "string" + }, + "n": { + "type": "int64" + }, + "response_format": { + "type": "string" + }, + "vendor": { + "type": "string" + }, + "azure_endpoint": { + "type": "string" + }, + "azure_api_version": { + "type": "string" + }, + "fallback_model": { + "type": "string" + } + } + }, + "dump": { + "type": "bool" + }, + "dump_path": { + "type": "string" + } + } + }, + "cmd_in": [ + { + "name": "tool_call", + "property": { + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "object", + "properties": {} + } + }, + "required": [ + "name" + ] + } + } + ], + "cmd_out": [ + { + "name": "tool_register", + "property": { + "properties": { + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] + } + } + }, + "result": { + "property": { + "properties": { + "response": { + "type": "string" + } + } + } + } + } + ], + "data_out": [ + { + "name": "content_data", + "property": { + "properties": { + "text": { + "type": "string" + } + } + } + } + ] + } +} diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py new file mode 100644 index 0000000000..26d04eda0c --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py @@ -0,0 +1,191 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from typing import Optional +import json +from openai import AsyncOpenAI, AsyncAzureOpenAI +from ten_runtime import AsyncTenEnv +from ten_ai_base.const import LOG_CATEGORY_VENDOR +from .config import OpenAIGPTImageConfig + + +# Custom exceptions for better error handling +class ContentPolicyError(Exception): + """Raised when content violates OpenAI's usage policies""" + pass + + +class InvalidAPIKeyError(Exception): + """Raised when API key is invalid or unauthorized""" + pass + + +class ModelNotFoundError(Exception): + """Raised when requested model is not available""" + pass + + +class OpenAIImageClient: + """ + Client for OpenAI Images API (GPT Image 1.5 / DALL-E) + + Handles image generation requests with proper error handling + and fallback support. + """ + + def __init__(self, config: OpenAIGPTImageConfig, ten_env: AsyncTenEnv): + self.config = config + self.ten_env = ten_env + self.client: AsyncOpenAI | AsyncAzureOpenAI = None + self.current_model = config.params["model"] + + # Initialize appropriate client + vendor = config.params.get("vendor", "openai") + + if vendor == "azure": + # Azure OpenAI client + azure_endpoint = config.params.get("azure_endpoint") + azure_api_version = config.params.get("azure_api_version") + + if not azure_endpoint or not azure_api_version: + raise ValueError( + "Azure vendor requires azure_endpoint and azure_api_version" + ) + + self.client = AsyncAzureOpenAI( + api_key=config.params["api_key"], + api_version=azure_api_version, + azure_endpoint=azure_endpoint, + ) + ten_env.log_info( + f"Using Azure OpenAI: {azure_endpoint} (v{azure_api_version})" + ) + else: + # Standard OpenAI client + client_kwargs = { + "api_key": config.params["api_key"] + } + + # Optional custom base_url + base_url = config.params.get("base_url") + if base_url: + client_kwargs["base_url"] = base_url + ten_env.log_info(f"Using custom base_url: {base_url}") + + self.client = AsyncOpenAI(**client_kwargs) + ten_env.log_info("Using standard OpenAI API") + + async def generate_image( + self, + prompt: str, + quality: Optional[str] = None, + model_override: Optional[str] = None, + ) -> str: + """ + Generate image from text prompt + + Args: + prompt: Text description of desired image + quality: Optional quality override ('standard' or 'hd') + model_override: Optional model override (for fallback) + + Returns: + Image URL + + Raises: + ContentPolicyError: Content violates policies + InvalidAPIKeyError: API key is invalid + ModelNotFoundError: Model not available + Exception: Other API errors + """ + # Build request parameters + model = model_override or self.current_model + request_params = { + "model": model, + "prompt": prompt, + "size": self.config.params.get("size", "1024x1024"), + "quality": quality or self.config.params.get("quality", "standard"), + "n": 1, # Always generate 1 image + } + + # Add response_format if configured + response_format = self.config.params.get("response_format", "url") + if response_format: + request_params["response_format"] = response_format + + self.ten_env.log_info( + f"Requesting image generation: model={model}, " + f"size={request_params['size']}, quality={request_params['quality']}", + category=LOG_CATEGORY_VENDOR + ) + + try: + # Call OpenAI Images API + response = await self.client.images.generate(**request_params) + + # Extract image URL or base64 data + if response_format == "b64_json": + # Handle base64 response (future feature) + image_data = response.data[0].b64_json + # Convert to data URL + image_url = f"data:image/png;base64,{image_data}" + else: + # Standard URL response + image_url = response.data[0].url + + # Optional: Save response for debugging + if self.config.dump: + self._dump_response(prompt, image_url) + + self.ten_env.log_info( + f"Image generated successfully: {image_url[:100]}...", + category=LOG_CATEGORY_VENDOR + ) + + return image_url + + except Exception as e: + error_message = str(e) + self.ten_env.log_error( + f"Image generation error: {error_message}", + category=LOG_CATEGORY_VENDOR + ) + + # Classify error for appropriate handling + if "content_policy_violation" in error_message.lower(): + raise ContentPolicyError(error_message) + + elif "401" in error_message or "invalid_api_key" in error_message: + raise InvalidAPIKeyError(error_message) + + elif "404" in error_message or "model_not_found" in error_message: + raise ModelNotFoundError(error_message) + + elif "429" in error_message: + # Rate limit - re-raise as generic exception + raise Exception("Rate limit exceeded. Please try again later.") + + else: + # Generic error + raise + + def _dump_response(self, prompt: str, image_url: str) -> None: + """Dump response to file for debugging""" + try: + with open(self.config.dump_path, "a") as f: + json.dump({ + "prompt": prompt, + "image_url": image_url, + "model": self.current_model, + }, f) + f.write("\n") + except Exception as e: + self.ten_env.log_warn(f"Failed to dump response: {e}") + + async def cleanup(self) -> None: + """Cleanup resources""" + if self.client: + await self.client.close() + self.ten_env.log_info("OpenAI client closed") diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/property.json b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/property.json new file mode 100644 index 0000000000..e6e5eaf874 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/property.json @@ -0,0 +1,17 @@ +{ + "params": { + "api_key": "${env:OPENAI_API_KEY|}", + "base_url": "${env:OPENAI_IMAGE_BASE_URL|}", + "model": "gpt-image-1.5", + "size": "1024x1024", + "quality": "standard", + "n": 1, + "response_format": "url", + "vendor": "openai", + "azure_endpoint": "${env:AZURE_OPENAI_IMAGE_ENDPOINT|}", + "azure_api_version": "${env:AZURE_OPENAI_IMAGE_API_VERSION|}", + "fallback_model": "dall-e-3" + }, + "dump": false, + "dump_path": "./openai_image_responses.json" +} diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/requirements.txt b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/requirements.txt new file mode 100644 index 0000000000..8403a26032 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/requirements.txt @@ -0,0 +1,2 @@ +openai>=1.12.0 +pydantic>=2.0.0 diff --git a/ai_agents/playground/bun.lock b/ai_agents/playground/bun.lock index 3233db0a2f..2b61627fca 100644 --- a/ai_agents/playground/bun.lock +++ b/ai_agents/playground/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "ten_agent_playground", From 17097aca2004b3856d90d4777293591c05ca75cf Mon Sep 17 00:00:00 2001 From: Elliot Chen Date: Mon, 22 Dec 2025 11:31:56 +0800 Subject: [PATCH 2/7] feat(voice-image-kids): add TTS support and update agent configuration --- .../examples/voice-image-kids/README.md | 19 +++ .../tenapp/manifest-lock.json | 17 +++ .../voice-image-kids/tenapp/manifest.json | 3 + .../voice-image-kids/tenapp/property.json | 28 +++- .../extension/main_python/README.md | 142 ++++++++++++++++++ .../extension/main_python/__init__.py | 2 - .../extension/main_python/addon.py | 9 +- .../extension/main_python/agent/llm_exec.py | 1 + .../extension/main_python/config.py | 13 +- .../extension/main_python/extension.py | 123 ++++++++++----- .../extension/main_python/manifest.json | 6 +- .../extension/main_python/property.json | 4 +- .../extension/main_python/requirements.txt | 1 - .../openai_gpt_image_python/config.py | 2 +- .../tests/test_params.py | 19 +++ 15 files changed, 322 insertions(+), 67 deletions(-) create mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md delete mode 100644 ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt create mode 100644 ai_agents/agents/ten_packages/extension/openai_gpt_image_python/tests/test_params.py diff --git a/ai_agents/agents/examples/voice-image-kids/README.md b/ai_agents/agents/examples/voice-image-kids/README.md index 37b5fa52d7..29900a9ac3 100644 --- a/ai_agents/agents/examples/voice-image-kids/README.md +++ b/ai_agents/agents/examples/voice-image-kids/README.md @@ -95,6 +95,25 @@ This starts: - **API Server**: http://localhost:8080 - **TMAN Designer**: http://localhost:49483 +## API Endpoints + +- `GET /graphs` lists available graphs in `tenapp/property.json` for the API server: + ``` + curl http://localhost:8080/graphs + ``` +- `POST /start` launches a worker with the selected graph and optional property overrides: + ``` + curl -X POST http://localhost:8080/start \ + -H 'Content-Type: application/json' \ + -d '{ + "request_id":"any-id", + "channel_name":"kids_demo", + "user_uid":10001, + "graph_name":"voice_image_kids", + "properties":{} + }' + ``` + ## Usage 1. Open http://localhost:3000 in your browser diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json index 0595606ec9..271fcdaa51 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json @@ -100,6 +100,23 @@ ], "path": "../../../ten_packages/extension/openai_llm2_python" }, + { + "type": "extension", + "name": "openai_tts2_python", + "version": "0.5.1", + "hash": "fe3e4473196cbab079a7a6ff99282fec7259e80a57ced948bbc4b27908a3df81", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python" + }, + { + "type": "system", + "name": "ten_ai_base" + } + ], + "path": "../../../ten_packages/extension/openai_tts2_python" + }, { "type": "extension", "name": "openai_gpt_image_python", diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json index 0331b67380..105cc9cfd3 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json @@ -27,6 +27,9 @@ { "path": "../../../ten_packages/extension/openai_llm2_python" }, + { + "path": "../../../ten_packages/extension/openai_tts2_python" + }, { "path": "../../../ten_packages/extension/openai_gpt_image_python" }, diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json index cb20d4d857..286fa9b143 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json @@ -18,7 +18,7 @@ "stream_id": 1234, "remote_stream_id": 123, "subscribe_audio": true, - "publish_audio": false, + "publish_audio": true, "publish_data": true, "enable_agora_asr": false } @@ -45,10 +45,24 @@ "model": "${env:OPENAI_MODEL|gpt-4o-mini}", "max_tokens": 512, "prompt": "You are a friendly AI art assistant for kids! When children describe what they want to draw, help them create amazing images.\n\nGuidelines:\n- Keep responses short, fun, and encouraging\n- When they describe an image idea, use the generate_image tool immediately\n- Make prompts detailed and vivid (add colors, mood, style details)\n- After generating, celebrate their creativity!\n- If the image can't be created, gently suggest something similar\n\nExample:\nKid: \"I want a purple dragon!\"\nYou: \"Ooh, a purple dragon! Let me create that for you!\" [calls generate_image with detailed prompt]", - "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!", + "greeting": "Hi there! I'm your AI art buddy! Tell me what you'd like me to draw for you!", "max_memory_length": 10 } }, + { + "type": "extension", + "name": "tts", + "addon": "openai_tts2_python", + "extension_group": "tts", + "property": { + "params": { + "api_key": "${env:OPENAI_API_KEY}", + "model": "tts-1", + "voice": "nova", + "speed": 1.0 + } + } + }, { "type": "extension", "name": "image_gen_tool", @@ -70,7 +84,7 @@ "addon": "main_python", "extension_group": "control", "property": { - "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!" + "greeting": "Hi there! I'm your AI art buddy! Tell me what you'd like me to draw for you!" } }, { @@ -134,6 +148,14 @@ "extension": "streamid_adapter" } ] + }, + { + "name": "pcm_frame", + "source": [ + { + "extension": "tts" + } + ] } ], "data": [ diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md new file mode 100644 index 0000000000..69ad38d248 --- /dev/null +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md @@ -0,0 +1,142 @@ +# Main Control Python Extension + +A TEN Framework extension that serves as the central control logic for AI agent interactions, managing speech recognition, language model processing, and text-to-speech coordination. + +## Overview + +The `main_python` extension acts as the orchestrator for AI agent conversations, handling real-time speech processing, LLM interactions, and TTS output. It manages user session state and coordinates data flow between different components in the TEN Framework. + +## Features + +- **Real-time Speech Processing**: Handles ASR (Automatic Speech Recognition) results and manages streaming text +- **LLM Integration**: Coordinates with language models for natural language understanding and response generation +- **TTS Coordination**: Manages text-to-speech requests for audio output +- **Session Management**: Tracks user presence and manages conversation state +- **Streaming Support**: Handles both final and intermediate results for smooth user experience +- **Caption Generation**: Provides real-time captions for accessibility and logging + +## API Interface + +### Input Data + +#### ASR Result +```json +{ + "text": "string", + "final": "bool", + "metadata": { + "session_id": "string" + } +} +``` + +#### LLM Result +```json +{ + "text": "string", + "end_of_segment": "bool" +} +``` + +### Output Data + +#### Text Data +```json +{ + "text": "string", + "is_final": "bool", + "end_of_segment": "bool", + "stream_id": "uint32" +} +``` + +### Commands + +#### Input Commands +- `on_user_joined`: Triggered when a user joins the session +- `on_user_left`: Triggered when a user leaves the session + +#### Output Commands +- `flush`: Sends flush commands to LLM, TTS, and RTC components + +## Configuration + +The extension supports the following configuration options: + +```json +{ + "greeting": "Hello there, I'm TEN Agent" +} +``` + +### Configuration Parameters + +- `greeting` (string, default: "Hello there, I'm TEN Agent"): The greeting message to display when the first user joins + +## Dependencies + +- `ten_runtime_python` (version 0.10): Core TEN Framework runtime +- `ten_ai_base` (version 0.6.9): AI base functionality + +## Usage + +### Installation + +The extension is part of the TEN Framework and can be installed through the TEN package manager: + +```bash +ten install main_python +``` + +### Integration + +This extension is designed to work with other TEN Framework components: + +- **ASR Extension**: Provides speech recognition results +- **LLM Extension**: Processes natural language and generates responses +- **TTS Extension**: Converts text to speech +- **RTC Extension**: Handles real-time communication +- **Message Collector**: Captures and displays conversation data + +### Workflow + +1. **User Joins**: When a user joins, the extension sends a greeting if configured +2. **Speech Processing**: ASR results are processed and captions are generated +3. **LLM Processing**: Final speech segments are sent to the LLM for processing +4. **Response Generation**: LLM responses are converted to speech and displayed as captions +5. **Streaming**: Both intermediate and final results are handled for smooth interaction + +## Development + +### Building + +The extension uses the standard TEN Framework build system: + +```bash +ten build main_python +``` + +### Testing + +Run the extension tests: + +```bash +ten test main_python +``` + +## Architecture + +The extension implements the `AsyncExtension` interface and provides: + +- **Lifecycle Management**: Proper initialization, start, stop, and cleanup +- **Event Handling**: Processes commands and data events asynchronously +- **State Management**: Tracks user count and conversation state +- **Data Routing**: Routes data between different framework components + +## License + +This extension is part of the TEN Framework and is licensed under the Apache License, Version 2.0. + +## Contributing + +Contributions are welcome! Please refer to the main TEN Framework documentation for contribution guidelines. diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py index 0413aa9b81..72593ab225 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py @@ -4,5 +4,3 @@ # See the LICENSE file for more information. # from . import addon - -__all__ = ["addon"] diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py index b67d451051..d7441c50c0 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py @@ -8,13 +8,12 @@ register_addon_as_extension, TenEnv, ) -from .extension import MainControlExtension @register_addon_as_extension("main_python") class MainControlExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: - ten_env.log_info("MainControlExtension: on_create_instance") - ten_env.on_create_instance_done( - MainControlExtension(name), context - ) + from .extension import MainControlExtension + + ten_env.log_info("on_create_instance") + ten_env.on_create_instance_done(MainControlExtension(name), context) diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py index 5e57598d1c..18e75b0980 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py @@ -157,6 +157,7 @@ async def _send_to_llm( self.current_request_id = request_id llm_input = LLMRequest( request_id=request_id, + model="", # Model is configured in LLM extension, pass empty to use default messages=messages, streaming=True, parameters={"temperature": 0.7}, diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py index 89e3486331..17686708e8 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py @@ -1,14 +1,5 @@ -# -# This file is part of TEN Framework, an open source project. -# Licensed under the Apache License, Version 2.0. -# See the LICENSE file for more information. -# -from pydantic import BaseModel, Field +from pydantic import BaseModel class MainControlConfig(BaseModel): - """Main control configuration for voice-image-kids app""" - greeting: str = Field( - default="Hi! I'm your AI art friend! Tell me what you'd like to draw!", - description="Greeting message when user joins" - ) + greeting: str = "Hello, I am your AI assistant." diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py index a94e54134f..923104dfb9 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py @@ -1,8 +1,6 @@ -# -# This file is part of TEN Framework, an open source project. -# Licensed under the Apache License, Version 2.0. -# See the LICENSE file for more information. -# +import asyncio +import json +import time from typing import Literal from .agent.decorators import agent_event_handler @@ -21,14 +19,16 @@ UserJoinedEvent, UserLeftEvent, ) -from .helper import _send_data -from .config import MainControlConfig +from .helper import _send_cmd, _send_data, parse_sentences +from .config import MainControlConfig # assume extracted from your base model + +import uuid class MainControlExtension(AsyncExtension): """ - Main control extension for voice-image-kids app. - Simplified version without TTS - images are the primary output. + The entry point of the agent module. + Consumes semantic AgentEvents from the Agent class and drives the runtime behavior. """ def __init__(self, name: str): @@ -39,6 +39,7 @@ def __init__(self, name: str): self.stopped: bool = False self._rtc_user_count: int = 0 + self.sentence_fragment: str = "" self.turn_id: int = 0 self.session_id: str = "0" @@ -54,52 +55,58 @@ async def on_init(self, ten_env: AsyncTenEnv): self.agent = Agent(ten_env) - # Auto-register decorated methods + # Now auto-register decorated methods for attr_name in dir(self): fn = getattr(self, attr_name) event_type = getattr(fn, "_agent_event_type", None) if event_type: self.agent.on(event_type, fn) - # === Event Handlers === + # === Register handlers with decorators === @agent_event_handler(UserJoinedEvent) async def _on_user_joined(self, event: UserJoinedEvent): - """Handle user joining the session""" self._rtc_user_count += 1 if self._rtc_user_count == 1 and self.config and self.config.greeting: - # Send greeting message to frontend + await self._send_to_tts(self.config.greeting, True) await self._send_transcript( "assistant", self.config.greeting, True, 100 ) @agent_event_handler(UserLeftEvent) async def _on_user_left(self, event: UserLeftEvent): - """Handle user leaving the session""" self._rtc_user_count -= 1 @agent_event_handler(ToolRegisterEvent) async def _on_tool_register(self, event: ToolRegisterEvent): - """Register LLM tools (e.g., image generation)""" await self.agent.register_llm_tool(event.tool, event.source) @agent_event_handler(ASRResultEvent) async def _on_asr_result(self, event: ASRResultEvent): - """Handle speech recognition results""" self.session_id = event.metadata.get("session_id", "100") stream_id = int(self.session_id) if not event.text: return + if event.final or len(event.text) > 2: + await self._interrupt() if event.final: self.turn_id += 1 - # Send user's speech to LLM for processing await self.agent.queue_llm_input(event.text) - # Show transcript to user await self._send_transcript("user", event.text, event.final, stream_id) @agent_event_handler(LLMResponseEvent) async def _on_llm_response(self, event: LLMResponseEvent): - """Handle LLM responses (including tool calls for image generation)""" - # Show LLM response to user + if not event.is_final and event.type == "message": + sentences, self.sentence_fragment = parse_sentences( + self.sentence_fragment, event.delta + ) + for s in sentences: + await self._send_to_tts(s, False) + + if event.is_final and event.type == "message": + remaining_text = self.sentence_fragment or "" + self.sentence_fragment = "" + await self._send_to_tts(remaining_text, True) + await self._send_transcript( "assistant", event.text, @@ -108,7 +115,6 @@ async def _on_llm_response(self, event: LLMResponseEvent): data_type=("reasoning" if event.type == "reasoning" else "text"), ) - # === Lifecycle Hooks === async def on_start(self, ten_env: AsyncTenEnv): ten_env.log_info("[MainControlExtension] on_start") @@ -123,7 +129,7 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd): async def on_data(self, ten_env: AsyncTenEnv, data: Data): await self.agent.on_data(data) - # === Helper Methods === + # === helpers === async def _send_transcript( self, role: str, @@ -132,7 +138,9 @@ async def _send_transcript( stream_id: int, data_type: Literal["text", "reasoning"] = "text", ): - """Send transcript to message collector for chat UI""" + """ + Sends the transcript (ASR or LLM output) to the message collector. + """ if data_type == "text": await _send_data( self.ten_env, @@ -140,14 +148,11 @@ async def _send_transcript( "message_collector", { "data_type": "transcribe", - "text_data": { - "text": text, - "is_final": final, - "stream_id": stream_id, - "end_of_segment": final, - "role": role, - }, - **self._current_metadata(), + "role": role, + "text": text, + "text_ts": int(time.time() * 1000), + "is_final": final, + "stream_id": stream_id, }, ) elif data_type == "reasoning": @@ -156,13 +161,53 @@ async def _send_transcript( "message", "message_collector", { - "data_type": "reasoning", - "text_data": { - "text": text, - "is_final": final, - "stream_id": stream_id, - "end_of_segment": final, - }, - **self._current_metadata(), + "data_type": "raw", + "role": role, + "text": json.dumps( + { + "type": "reasoning", + "data": { + "text": text, + }, + } + ), + "text_ts": int(time.time() * 1000), + "is_final": final, + "stream_id": stream_id, }, ) + self.ten_env.log_info( + f"[MainControlExtension] Sent transcript: {role}, final={final}, text={text}" + ) + + async def _send_to_tts(self, text: str, is_final: bool): + """ + Sends a sentence to the TTS system. + """ + request_id = f"tts-request-{self.turn_id}" + await _send_data( + self.ten_env, + "tts_text_input", + "tts", + { + "request_id": request_id, + "text": text, + "text_input_end": is_final, + "metadata": self._current_metadata(), + }, + ) + self.ten_env.log_info( + f"[MainControlExtension] Sent to TTS: is_final={is_final}, text={text}" + ) + + async def _interrupt(self): + """ + Interrupts ongoing LLM and TTS generation. Typically called when user speech is detected. + """ + self.sentence_fragment = "" + await self.agent.flush_llm() + await _send_data( + self.ten_env, "tts_flush", "tts", {"flush_id": str(uuid.uuid4())} + ) + await _send_cmd(self.ten_env, "flush", "agora_rtc") + self.ten_env.log_info("[MainControlExtension] Interrupt signal sent") diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json index 4cf35fbc6a..bf695c1b3d 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json @@ -18,8 +18,10 @@ "include": [ "manifest.json", "property.json", + "**.tent", "**.py", - "README.md" + "README.md", + "tests/**" ] }, "api": { @@ -31,4 +33,4 @@ } } } -} +} \ No newline at end of file diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json index adeda64500..9e26dfeeb6 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json @@ -1,3 +1 @@ -{ - "greeting": "Hi! I'm your AI art friend! Tell me what you'd like to draw!" -} +{} \ No newline at end of file diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt b/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt deleted file mode 100644 index 65b1b0c122..0000000000 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pydantic>=2.0.0 diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py index d3a4c69eba..df6b9fcb70 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/config.py @@ -35,7 +35,7 @@ def validate(self) -> None: raise ValueError("Model is required (params.model)") # Validate model - valid_models = ["gpt-image-1.5", "dall-e-3", "dall-e-2"] + valid_models = ["gpt-image-1", "gpt-image-1.5", "dall-e-3", "dall-e-2"] if self.params["model"] not in valid_models: raise ValueError(f"Invalid model. Must be one of: {valid_models}") diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/tests/test_params.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/tests/test_params.py new file mode 100644 index 0000000000..b06cd9f97e --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/tests/test_params.py @@ -0,0 +1,19 @@ +import json +from openai_gpt_image_python.config import OpenAIGPTImageConfig + +def test_config_defaults_and_validation(): + cfg_json = json.dumps({ + "params": { + "api_key": "sk-test", + "model": "gpt-image-1.5", + "size": "1024x1024", + "quality": "standard", + "fallback_model": "dall-e-3" + } + }) + cfg = OpenAIGPTImageConfig.model_validate_json(cfg_json) + # update_params may normalize values if needed + cfg.update_params() + cfg.validate() + assert cfg.params["model"] in ["gpt-image-1.5", "dall-e-3"] + assert cfg.params["quality"] in ["standard", "hd"] From 83843ba9303962c2d8f85c9b1f389b81e6aaaacb Mon Sep 17 00:00:00 2001 From: Elliot Chen Date: Thu, 25 Dec 2025 18:13:07 +0800 Subject: [PATCH 3/7] feat: add immersive doodle experience with canvas animations --- .../voice-assistant/tenapp/property.json | 39 + .../voice-image-kids/tenapp/property.json | 31 +- .../openai_gpt_image_python/extension.py | 110 ++- .../openai_gpt_image_python/manifest.json | 12 +- .../openai_image_client.py | 28 +- ai_agents/playground/bun.lock | 7 + ai_agents/playground/next-env.d.ts | 2 +- ai_agents/playground/package-lock.json | 98 ++- ai_agents/playground/package.json | 1 + ai_agents/playground/src/app/global.css | 354 ++++++++- ai_agents/playground/src/app/layout.tsx | 8 +- ai_agents/playground/src/app/page.tsx | 90 +-- .../src/components/Doodle/DoodlePanel.tsx | 144 ++++ .../src/components/Doodler/AppShell.tsx | 161 ++++ .../src/components/Doodler/BoardStage.tsx | 228 ++++++ .../src/components/Doodler/Canvas.tsx | 38 + .../src/components/Doodler/ControlsBar.tsx | 94 +++ .../components/Doodler/FloatingDoodles.tsx | 697 ++++++++++++++++++ .../src/components/Doodler/ImmersiveShell.tsx | 540 ++++++++++++++ .../components/Doodler/LoadingAnimator.tsx | 40 + .../Doodler/MagicCanvasBackground.tsx | 72 ++ .../components/Doodler/MagicPenAnimator.tsx | 232 ++++++ .../src/components/Doodler/MouseTrail.tsx | 152 ++++ .../components/Doodler/TranscriptPanel.tsx | 123 ++++ 24 files changed, 3123 insertions(+), 178 deletions(-) create mode 100644 ai_agents/playground/src/components/Doodle/DoodlePanel.tsx create mode 100644 ai_agents/playground/src/components/Doodler/AppShell.tsx create mode 100644 ai_agents/playground/src/components/Doodler/BoardStage.tsx create mode 100644 ai_agents/playground/src/components/Doodler/Canvas.tsx create mode 100644 ai_agents/playground/src/components/Doodler/ControlsBar.tsx create mode 100644 ai_agents/playground/src/components/Doodler/FloatingDoodles.tsx create mode 100644 ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx create mode 100644 ai_agents/playground/src/components/Doodler/LoadingAnimator.tsx create mode 100644 ai_agents/playground/src/components/Doodler/MagicCanvasBackground.tsx create mode 100644 ai_agents/playground/src/components/Doodler/MagicPenAnimator.tsx create mode 100644 ai_agents/playground/src/components/Doodler/MouseTrail.tsx create mode 100644 ai_agents/playground/src/components/Doodler/TranscriptPanel.tsx diff --git a/ai_agents/agents/examples/voice-assistant/tenapp/property.json b/ai_agents/agents/examples/voice-assistant/tenapp/property.json index a21de0277f..75749fa362 100644 --- a/ai_agents/agents/examples/voice-assistant/tenapp/property.json +++ b/ai_agents/agents/examples/voice-assistant/tenapp/property.json @@ -94,6 +94,29 @@ "api_key": "${env:WEATHERAPI_API_KEY|}" } }, + { + "type": "extension", + "name": "openai_gpt_image_python", + "addon": "openai_gpt_image_python", + "extension_group": "default", + "property": { + "params": { + "api_key": "${env:OPENAI_API_KEY|}", + "base_url": "${env:OPENAI_IMAGE_BASE_URL|}", + "model": "gpt-image-1.5", + "size": "1024x1024", + "quality": "standard", + "n": 1, + "response_format": "url", + "vendor": "openai", + "azure_endpoint": "${env:AZURE_OPENAI_IMAGE_ENDPOINT|}", + "azure_api_version": "${env:AZURE_OPENAI_IMAGE_API_VERSION|}", + "fallback_model": "dall-e-3" + }, + "dump": false, + "dump_path": "./openai_image_responses.json" + } + }, { "type": "extension", "name": "streamid_adapter", @@ -123,6 +146,9 @@ "source": [ { "extension": "weatherapi_tool_python" + }, + { + "extension": "openai_gpt_image_python" } ] } @@ -169,6 +195,19 @@ } ] }, + { + "extension": "message_collector", + "data": [ + { + "name": "content_data", + "source": [ + { + "extension": "openai_gpt_image_python" + } + ] + } + ] + }, { "extension": "streamid_adapter", "audio_frame": [ diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json index 286fa9b143..7d9029bc49 100644 --- a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json +++ b/ai_agents/agents/examples/voice-image-kids/tenapp/property.json @@ -71,9 +71,10 @@ "property": { "params": { "api_key": "${env:OPENAI_API_KEY}", - "model": "gpt-image-1.5", + "model": "dall-e-3", "size": "1024x1024", "quality": "standard", + "response_format": "url", "fallback_model": "dall-e-3" } } @@ -138,6 +139,32 @@ } ] }, + { + "extension": "message_collector", + "data": [ + { + "name": "content_data", + "source": [ + { + "extension": "image_gen_tool" + } + ] + } + ] + }, + { + "extension": "message_collector", + "data": [ + { + "name": "message", + "source": [ + { + "extension": "image_gen_tool" + } + ] + } + ] + }, { "extension": "agora_rtc", "audio_frame": [ @@ -208,4 +235,4 @@ ] } } -} +} \ No newline at end of file diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py index 0f0b4ec66f..ab80de389f 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py @@ -4,6 +4,7 @@ # See the LICENSE file for more information. # import json +import time from ten_runtime import ( Data, TenEnv, @@ -121,25 +122,27 @@ def get_tool_metadata(self, ten_env: TenEnv) -> list[LLMToolMetadata]: async def send_image( self, async_ten_env: AsyncTenEnv, image_url: str ) -> None: - """Send generated image URL to frontend via content_data""" + """Send generated image URL to frontend""" async_ten_env.log_info(f"Sending image URL: {image_url}") try: - # Format as JSON matching TEN content_data schema - payload = json.dumps({ - "data": { - "image_url": image_url - }, - "type": "image_url" - }) - - # Create content_data message - output_data = Data.create(CONTENT_DATA_OUT_NAME) - output_data.set_property_string(DATA_OUT_PROPERTY_TEXT, payload) - output_data.set_property_bool(DATA_OUT_PROPERTY_END_OF_SEGMENT, True) - - # Send asynchronously - await async_ten_env.send_data(output_data) + payload_obj = { + "data_type": "raw", + "role": "assistant", + "text": json.dumps({ + "type": "image_url", + "data": { + "image_url": image_url + } + }), + "text_ts": int(time.time() * 1000), + "is_final": True, + "stream_id": 100 + } + + msg = Data.create("message") + msg.set_property_from_json(None, json.dumps(payload_obj)) + await async_ten_env.send_data(msg) async_ten_env.log_info( "Image URL sent successfully", @@ -175,11 +178,61 @@ async def run_tool( # Override quality if specified quality = args.get("quality", self.config.params.get("quality")) + # Enforce kid-friendly doodle style + unsafe_keywords = [ + "weapon", "blood", "violence", "gore", "nsfw", "adult", + "gun", "knife", "kill", "attack", "war" + ] + lowered = prompt.lower() + if any(k in lowered for k in unsafe_keywords): + return LLMToolResultLLMResult( + type="llmresult", + content=json.dumps({ + "success": False, + "error": "Let's try a kid-friendly idea. Describe a playful scene or character to doodle!" + }), + ) + doodle_modifier = ( + " in playful doodle style, hand-drawn, crayon-like, bold outlines, simple shapes, " + "kid-friendly and cheerful" + ) + prompt = f"{prompt.strip()}{doodle_modifier}" + + # Emit progress: queued → generating + try: + queued_msg = { + "data_type": "raw", + "role": "assistant", + "text": json.dumps({"type": "progress", "data": {"phase": "queued", "pct": 10}}), + "text_ts": int(time.time() * 1000), + "is_final": False, + "stream_id": 100 + } + msg = Data.create("message") + msg.set_property_from_json(None, json.dumps(queued_msg)) + await ten_env.send_data(msg) + except Exception: + pass + # Generate image ten_env.log_info( f"Generating image with prompt: {prompt[:100]}...", category=LOG_CATEGORY_KEY_POINT ) + try: + generating_msg = { + "data_type": "raw", + "role": "assistant", + "text": json.dumps({"type": "progress", "data": {"phase": "generating", "pct": 50}}), + "text_ts": int(time.time() * 1000), + "is_final": False, + "stream_id": 100 + } + msg2 = Data.create("message") + msg2.set_property_from_json(None, json.dumps(generating_msg)) + await ten_env.send_data(msg2) + except Exception: + pass image_url = await self.client.generate_image( prompt=prompt, quality=quality, @@ -188,13 +241,29 @@ async def run_tool( # Send image to frontend await self.send_image(ten_env, image_url) - # Return success to LLM + # Emit progress: final + try: + final_msg = { + "data_type": "raw", + "role": "assistant", + "text": json.dumps({"type": "progress", "data": {"phase": "final", "pct": 100}}), + "text_ts": int(time.time() * 1000), + "is_final": True, + "stream_id": 100 + } + msg3 = Data.create("message") + msg3.set_property_from_json(None, json.dumps(final_msg)) + await ten_env.send_data(msg3) + except Exception: + pass + + # Return success to LLM (without image data to avoid context overflow) + # The image is already sent to the frontend via send_image() return LLMToolResultLLMResult( type="llmresult", content=json.dumps({ "success": True, - "image_url": image_url, - "message": "Image generated successfully!" + "message": "Image generated and sent to the user successfully!" }), ) @@ -248,8 +317,7 @@ async def run_tool( type="llmresult", content=json.dumps({ "success": True, - "image_url": image_url, - "message": f"Image generated with {fallback_model}" + "message": f"Image generated with {fallback_model} and sent to the user successfully!" }), ) except Exception as fallback_error: diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json index 778f22441f..a14144989c 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json @@ -142,7 +142,17 @@ } } } + }, + { + "name": "message", + "property": { + "properties": { + "text": { + "type": "string" + } + } + } } ] } -} +} \ No newline at end of file diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py index 26d04eda0c..bbd19770a4 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/openai_image_client.py @@ -102,17 +102,32 @@ async def generate_image( """ # Build request parameters model = model_override or self.current_model + is_gpt_image_model = model.startswith("gpt-image") + + # GPT Image models use different quality values than DALL-E + # DALL-E: 'standard', 'hd' + # GPT Image: 'low', 'medium', 'high', 'auto' + requested_quality = quality or self.config.params.get("quality", "standard") + if is_gpt_image_model: + # Map DALL-E quality values to GPT Image values + quality_map = { + "standard": "auto", + "hd": "high", + } + requested_quality = quality_map.get(requested_quality, requested_quality) + request_params = { "model": model, "prompt": prompt, "size": self.config.params.get("size", "1024x1024"), - "quality": quality or self.config.params.get("quality", "standard"), + "quality": requested_quality, "n": 1, # Always generate 1 image } - # Add response_format if configured + # GPT Image models (gpt-image-1, gpt-image-1.5) don't support response_format + # Only add it for DALL-E models response_format = self.config.params.get("response_format", "url") - if response_format: + if response_format and not is_gpt_image_model: request_params["response_format"] = response_format self.ten_env.log_info( @@ -126,13 +141,14 @@ async def generate_image( response = await self.client.images.generate(**request_params) # Extract image URL or base64 data - if response_format == "b64_json": - # Handle base64 response (future feature) + # GPT Image models ONLY return base64 data, not URLs + if is_gpt_image_model or response_format == "b64_json": + # GPT Image models always return base64 image_data = response.data[0].b64_json # Convert to data URL image_url = f"data:image/png;base64,{image_data}" else: - # Standard URL response + # DALL-E models support URL response image_url = response.data[0].url # Optional: Save response for debugging diff --git a/ai_agents/playground/bun.lock b/ai_agents/playground/bun.lock index 2b61627fca..cc31dbe249 100644 --- a/ai_agents/playground/bun.lock +++ b/ai_agents/playground/bun.lock @@ -25,6 +25,7 @@ "axios": "^1.13.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "framer-motion": "^12.23.26", "lucide-react": "^0.546.0", "next": "16.0.1", "next-themes": "^0.4.6", @@ -721,6 +722,8 @@ "formdata-polyfill": ["formdata-polyfill@4.0.10", "", { "dependencies": { "fetch-blob": "^3.1.2" } }, "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g=="], + "framer-motion": ["framer-motion@12.23.26", "", { "dependencies": { "motion-dom": "^12.23.23", "motion-utils": "^12.23.6", "tslib": "^2.4.0" }, "peerDependencies": { "@emotion/is-prop-valid": "*", "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" }, "optionalPeers": ["@emotion/is-prop-valid", "react", "react-dom"] }, "sha512-cPcIhgR42xBn1Uj+PzOyheMtZ73H927+uWPDVhUMqxy8UHt6Okavb6xIz9J/phFUHUj0OncR6UvMfJTXoc/LKA=="], + "fs.realpath": ["fs.realpath@1.0.0", "", {}, "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw=="], "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], @@ -849,6 +852,10 @@ "mkdirp": ["mkdirp@1.0.4", "", { "bin": { "mkdirp": "bin/cmd.js" } }, "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw=="], + "motion-dom": ["motion-dom@12.23.23", "", { "dependencies": { "motion-utils": "^12.23.6" } }, "sha512-n5yolOs0TQQBRUFImrRfs/+6X4p3Q4n1dUEqt/H58Vx7OW6RF+foWEgmTVDhIWJIMXOuNNL0apKH2S16en9eiA=="], + + "motion-utils": ["motion-utils@12.23.6", "", {}, "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ=="], + "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], diff --git a/ai_agents/playground/next-env.d.ts b/ai_agents/playground/next-env.d.ts index c4b7818fbb..9edff1c7ca 100644 --- a/ai_agents/playground/next-env.d.ts +++ b/ai_agents/playground/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -import "./.next/dev/types/routes.d.ts"; +import "./.next/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/ai_agents/playground/package-lock.json b/ai_agents/playground/package-lock.json index 00c5d5f457..204d0280c7 100644 --- a/ai_agents/playground/package-lock.json +++ b/ai_agents/playground/package-lock.json @@ -23,11 +23,12 @@ "@radix-ui/react-tooltip": "^1.2.8", "@reduxjs/toolkit": "^2.9.2", "@trulience/react-sdk": "^1.0.98", - "agora-rtc-sdk-ng": "^4.24.0", + "agora-rtc-sdk-ng": "^4.23.0", "agora-rtm": "^2.2.3", "axios": "^1.13.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "framer-motion": "^12.23.26", "lucide-react": "^0.546.0", "next": "16.0.1", "next-themes": "^0.4.6", @@ -62,35 +63,32 @@ } }, "node_modules/@agora-js/media": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@agora-js/media/-/media-4.24.0.tgz", - "integrity": "sha512-foii2klr5+qonLznxN0ZZFejoxLt/W8do79wmIsADPZLw2uZjRP35m0lqUGiLXBKeQ8u3i4UygPzEdFaY26hrw==", - "license": "MIT", + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/@agora-js/media/-/media-4.23.0.tgz", + "integrity": "sha512-rV0CGP5nhvd7XrzpCqN7Fud+W//c/+f0dMe7wPB+DS/ckeTVxSdg96cTfEJS9poeTdDUvfhHyi0iECLPfUjkKA==", "dependencies": { - "@agora-js/report": "4.24.0", - "@agora-js/shared": "4.24.0", + "@agora-js/report": "4.23.0", + "@agora-js/shared": "4.23.0", "agora-rte-extension": "^1.2.4", - "axios": "^1.8.3", + "axios": "^1.7.7", "webrtc-adapter": "8.2.0" } }, "node_modules/@agora-js/report": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@agora-js/report/-/report-4.24.0.tgz", - "integrity": "sha512-MYbtkdY1Ls0KW0iagUzrPzyvqMWlyCWSC5odEb1SQaraAl7DJeDUkf91a3wxKzrjVah+LCxFxsS4lCFDxvKgNA==", - "license": "MIT", + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/@agora-js/report/-/report-4.23.0.tgz", + "integrity": "sha512-ti2HZc8ITRgbmukHRa+ZlH4ecCByHdQpPVT6v3vIn+ihvqrxs3A871iLe1jbjbeGNuOwqcaznSoRXOVsREeq2A==", "dependencies": { - "@agora-js/shared": "4.24.0", - "axios": "^1.8.3" + "@agora-js/shared": "4.23.0", + "axios": "^1.7.7" } }, "node_modules/@agora-js/shared": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@agora-js/shared/-/shared-4.24.0.tgz", - "integrity": "sha512-Vj67ZcTHZI+1ctWusrEPSSGLM3l6CFiAze/Bi8r7YHRMLivzhZR79nV6GiKvHS3muLAON2YAExznvjPIly6lcg==", - "license": "MIT", + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/@agora-js/shared/-/shared-4.23.0.tgz", + "integrity": "sha512-gfcEJu/+vPvMKYcjyWqmQKo4d0r0yggl9AhktsTWjg6mhScv4Lt3B8JdGGXvlIP6R+horRHjxsqPCdCVs1N4/w==", "dependencies": { - "axios": "^1.8.3", + "axios": "^1.7.7", "ua-parser-js": "^0.7.34" } }, @@ -3722,16 +3720,15 @@ } }, "node_modules/agora-rtc-sdk-ng": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/agora-rtc-sdk-ng/-/agora-rtc-sdk-ng-4.24.0.tgz", - "integrity": "sha512-2apG/07EtsuX21ncSF77q+dr6/kDgu9B/RpKtstCtaq46l4/Eraoecewi4zXRUCY3Im+8dzTIXx6jUwyPdxdHQ==", - "license": "MIT", + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/agora-rtc-sdk-ng/-/agora-rtc-sdk-ng-4.23.0.tgz", + "integrity": "sha512-/DVTowwsU0PpwTT/xXOa9UAB3qlVpLty2Cxsn+B0mEKHbXTd4P4cgeLaSCbxQFqDhYQAqpwyClxC5ABYBal5yA==", "dependencies": { - "@agora-js/media": "4.24.0", - "@agora-js/report": "4.24.0", - "@agora-js/shared": "4.24.0", + "@agora-js/media": "4.23.0", + "@agora-js/report": "4.23.0", + "@agora-js/shared": "4.23.0", "agora-rte-extension": "^1.2.4", - "axios": "^1.8.3", + "axios": "^1.7.7", "formdata-polyfill": "^4.0.7", "pako": "^2.1.0", "ua-parser-js": "^0.7.34", @@ -3741,8 +3738,7 @@ "node_modules/agora-rte-extension": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/agora-rte-extension/-/agora-rte-extension-1.2.4.tgz", - "integrity": "sha512-0ovZz1lbe30QraG1cU+ji7EnQ8aUu+Hf3F+a8xPml3wPOyUQEK6CTdxV9kMecr9t+fIDrGeW7wgJTsM1DQE7Nw==", - "license": "ISC" + "integrity": "sha512-0ovZz1lbe30QraG1cU+ji7EnQ8aUu+Hf3F+a8xPml3wPOyUQEK6CTdxV9kMecr9t+fIDrGeW7wgJTsM1DQE7Nw==" }, "node_modules/agora-rtm": { "version": "2.2.3", @@ -4579,6 +4575,32 @@ "node": ">=12.20.0" } }, + "node_modules/framer-motion": { + "version": "12.23.26", + "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.23.26.tgz", + "integrity": "sha512-cPcIhgR42xBn1Uj+PzOyheMtZ73H927+uWPDVhUMqxy8UHt6Okavb6xIz9J/phFUHUj0OncR6UvMfJTXoc/LKA==", + "dependencies": { + "motion-dom": "^12.23.23", + "motion-utils": "^12.23.6", + "tslib": "^2.4.0" + }, + "peerDependencies": { + "@emotion/is-prop-valid": "*", + "react": "^18.0.0 || ^19.0.0", + "react-dom": "^18.0.0 || ^19.0.0" + }, + "peerDependenciesMeta": { + "@emotion/is-prop-valid": { + "optional": true + }, + "react": { + "optional": true + }, + "react-dom": { + "optional": true + } + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "dev": true, @@ -5392,6 +5414,19 @@ "node": ">=10" } }, + "node_modules/motion-dom": { + "version": "12.23.23", + "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz", + "integrity": "sha512-n5yolOs0TQQBRUFImrRfs/+6X4p3Q4n1dUEqt/H58Vx7OW6RF+foWEgmTVDhIWJIMXOuNNL0apKH2S16en9eiA==", + "dependencies": { + "motion-utils": "^12.23.6" + } + }, + "node_modules/motion-utils": { + "version": "12.23.6", + "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.23.6.tgz", + "integrity": "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ==" + }, "node_modules/ms": { "version": "2.1.3", "dev": true, @@ -6017,8 +6052,7 @@ "node_modules/sdp": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/sdp/-/sdp-3.2.1.tgz", - "integrity": "sha512-lwsAIzOPlH8/7IIjjz3K0zYBk7aBVVcvjMwt3M4fLxpjMYyy7i3I97SLHebgn4YBjirkzfp3RvRDWSKsh/+WFw==", - "license": "MIT" + "integrity": "sha512-lwsAIzOPlH8/7IIjjz3K0zYBk7aBVVcvjMwt3M4fLxpjMYyy7i3I97SLHebgn4YBjirkzfp3RvRDWSKsh/+WFw==" }, "node_modules/semver": { "version": "7.7.2", @@ -6299,7 +6333,6 @@ "url": "https://github.com/sponsors/faisalman" } ], - "license": "MIT", "bin": { "ua-parser-js": "script/cli.js" }, @@ -6454,7 +6487,6 @@ "version": "8.2.0", "resolved": "https://registry.npmjs.org/webrtc-adapter/-/webrtc-adapter-8.2.0.tgz", "integrity": "sha512-umxCMgedPAVq4Pe/jl3xmelLXLn4XZWFEMR5Iipb5wJ+k1xMX0yC4ZY9CueZUU1MjapFxai1tFGE7R/kotH6Ww==", - "license": "BSD-3-Clause", "dependencies": { "sdp": "^3.0.2" }, diff --git a/ai_agents/playground/package.json b/ai_agents/playground/package.json index 6f0ac8968c..e46256ea9d 100644 --- a/ai_agents/playground/package.json +++ b/ai_agents/playground/package.json @@ -33,6 +33,7 @@ "axios": "^1.13.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "framer-motion": "^12.23.26", "lucide-react": "^0.546.0", "next": "16.0.1", "next-themes": "^0.4.6", diff --git a/ai_agents/playground/src/app/global.css b/ai_agents/playground/src/app/global.css index f3a37a6327..32974ddccb 100644 --- a/ai_agents/playground/src/app/global.css +++ b/ai_agents/playground/src/app/global.css @@ -71,8 +71,9 @@ html, body { - background-color: #0f0f11; - font-family: "PingFang SC"; + color-scheme: light; + font-family: ui-rounded, "PingFang SC", system-ui, -apple-system, + BlinkMacSystemFont, "Segoe UI", sans-serif; height: 100%; } @@ -81,40 +82,38 @@ text-decoration: none; } - @media (prefers-color-scheme: dark) { - html { - color-scheme: dark; - } + html { + background-color: hsl(var(--background)); } } @layer base { :root { - --background: 0 0% 100%; - --foreground: 0 0% 3.9%; + --background: 43 100% 97%; + --foreground: 24 22% 12%; --card: 0 0% 100%; - --card-foreground: 0 0% 3.9%; + --card-foreground: 24 22% 12%; --popover: 0 0% 100%; - --popover-foreground: 0 0% 3.9%; - --primary: 0 0% 9%; - --primary-foreground: 0 0% 98%; - --secondary: 0 0% 96.1%; - --secondary-foreground: 0 0% 9%; - --muted: 0 0% 96.1%; - --muted-foreground: 0 0% 45.1%; - --accent: 0 0% 96.1%; - --accent-foreground: 0 0% 9%; + --popover-foreground: 24 22% 12%; + --primary: 20 92% 55%; + --primary-foreground: 0 0% 100%; + --secondary: 40 52% 92%; + --secondary-foreground: 24 22% 12%; + --muted: 40 35% 90%; + --muted-foreground: 24 10% 38%; + --accent: 190 70% 92%; + --accent-foreground: 24 22% 12%; --destructive: 0 84.2% 60.2%; --destructive-foreground: 0 0% 98%; - --border: 0 0% 89.8%; - --input: 0 0% 89.8%; - --ring: 0 0% 3.9%; + --border: 24 18% 82%; + --input: 24 18% 82%; + --ring: 20 92% 55%; --chart-1: 12 76% 61%; --chart-2: 173 58% 39%; --chart-3: 197 37% 24%; --chart-4: 43 74% 66%; --chart-5: 27 87% 67%; - --radius: 0.5rem; + --radius: 0.75rem; } .dark { --background: 0 0% 3.9%; @@ -153,6 +152,317 @@ } } +@layer utilities { + .doodle-paper { + background-color: hsl(var(--background)); + background-image: + radial-gradient( + 1200px 820px at 10% 10%, + rgba(255, 214, 165, 0.55), + transparent 65% + ), + radial-gradient( + 1000px 700px at 86% 18%, + rgba(168, 231, 255, 0.4), + transparent 62% + ), + radial-gradient( + 980px 740px at 28% 92%, + rgba(255, 168, 214, 0.26), + transparent 62% + ), + radial-gradient( + 820px 580px at 72% 72%, + rgba(255, 235, 140, 0.24), + transparent 60% + ), + linear-gradient(180deg, rgba(255, 255, 255, 0.85), rgba(255, 255, 255, 0.55)); + } + + .doodle-wash { + background-image: + radial-gradient(circle at 12% 22%, rgba(255, 110, 180, 0.16), transparent 52%), + radial-gradient(circle at 88% 18%, rgba(96, 211, 255, 0.18), transparent 48%), + radial-gradient(circle at 30% 82%, rgba(121, 235, 178, 0.16), transparent 52%), + radial-gradient(circle at 72% 82%, rgba(255, 210, 120, 0.14), transparent 50%); + mix-blend-mode: multiply; + filter: blur(18px); + opacity: 0.55; + } + + .doodle-tile { + background-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%27http%3A//www.w3.org/2000/svg%27%20width%3D%27320%27%20height%3D%27320%27%20viewBox%3D%270%200%20320%20320%27%20fill%3D%27none%27%20stroke%3D%27%23111%27%20stroke-width%3D%273%27%20stroke-linecap%3D%27round%27%20stroke-linejoin%3D%27round%27%3E%3Ccircle%20cx%3D%2732%27%20cy%3D%2732%27%20r%3D%2710%27/%3E%3Cpath%20d%3D%27M32%206v8M32%2050v8M6%2032h8M50%2032h8M14%2014l6%206M44%2044l6%206M50%2014l-6%206M14%2050l6-6%27/%3E%3Cpath%20d%3D%27M96%2020l6%2014%2014%206-14%206-6%2014-6-14-14-6%2014-6%206-14z%27/%3E%3Cpath%20d%3D%27M144%2044l4%2010%2010%204-10%204-4%2010-4-10-10-4%2010-4%204-10z%27/%3E%3Cpath%20d%3D%27M232%2022c-8-10-24-6-24%206%200%2014%2024%2026%2024%2026s24-12%2024-26c0-12-16-16-24-6z%27/%3E%3Cpath%20d%3D%27M68%20104c0-8%208-14%2016-10%206-10%2024-6%2022%208%2010%202%2012%2016%200%2018-8%202-28%202-38-2-8-4-6-14%200-14z%27/%3E%3Cpath%20d%3D%27M200%2088l30-8%2020%2018-8%2024-26%204-18-18%202-20z%27/%3E%3Cpath%20d%3D%27M34%20170c24-18%2044-12%2062-2%27/%3E%3Cpath%20d%3D%27M90%20158l12%204-8%2010%27/%3E%3Cpath%20d%3D%27M140%20150c8-10%2024-14%2036-4-8%2012-20%2018-36%2012%200-4%202-6%204-8%27/%3E%3Cpath%20d%3D%27M220%20164l10%2018%2020%204-16%2010-4%2020-10-16-20-4%2016-10%204-20z%27/%3E%3Cpath%20d%3D%27M80%20228c-10-6-22-4-28%206%2010%208%2022%2010%2030%206%27/%3E%3Cpath%20d%3D%27M160%20230c-6-16%204-34%2022-38-16%2010-18%2032-4%2044-12%204-22%200-18-6z%27/%3E%3Cpath%20d%3D%27M230%20236l0%2030%27/%3E%3Cpath%20d%3D%27M218%20252l24%200%27/%3E%3Cpath%20d%3D%27M252%20244l12%208-12%208%27/%3E%3C/svg%3E"); + background-repeat: repeat; + background-size: 320px 320px; + mix-blend-mode: multiply; + opacity: 0.16; + } + + .doodle-dream { + background-image: + radial-gradient(circle at 20% 20%, rgba(124, 255, 250, 0.38), transparent 55%), + radial-gradient(circle at 80% 25%, rgba(255, 122, 216, 0.3), transparent 55%), + radial-gradient(circle at 45% 75%, rgba(255, 204, 102, 0.26), transparent 60%), + radial-gradient(circle at 70% 80%, rgba(116, 198, 255, 0.26), transparent 65%); + filter: blur(22px) saturate(1.15); + transform: scale(1.08); + } + + .doodle-dream--classic { + filter: blur(24px) saturate(1.05); + } + + .doodle-dream--neon { + filter: blur(18px) saturate(1.45) contrast(1.06); + } + + .doodle-vignette { + background: radial-gradient( + closest-side at 50% 44%, + transparent 68%, + rgba(32, 16, 8, 0.14) + ); + mix-blend-mode: multiply; + opacity: 0.55; + } + + .doodle-board { + background-image: + linear-gradient(180deg, rgba(255, 255, 255, 0.74), rgba(255, 255, 255, 0.52)), + radial-gradient( + 900px 600px at 20% 0%, + rgba(255, 237, 213, 0.6), + transparent 62% + ), + radial-gradient( + 900px 600px at 80% 100%, + rgba(230, 245, 255, 0.58), + transparent 62% + ); + border: 1px solid rgba(0, 0, 0, 0.09); + box-shadow: 0 28px 70px rgba(34, 18, 10, 0.18); + backdrop-filter: blur(10px); + } + + .doodle-board-grid { + background-image: + repeating-linear-gradient( + 0deg, + rgba(0, 0, 0, 0.04) 0, + rgba(0, 0, 0, 0.04) 1px, + transparent 1px, + transparent 32px + ), + repeating-linear-gradient( + 90deg, + rgba(0, 0, 0, 0.04) 0, + rgba(0, 0, 0, 0.04) 1px, + transparent 1px, + transparent 32px + ); + opacity: 0.45; + } + + .doodle-palette { + border: 1px solid rgba(0, 0, 0, 0.11); + border-radius: 24px; + background: rgba(255, 255, 255, 0.72); + box-shadow: 0 18px 50px rgba(34, 18, 10, 0.18); + padding: 14px; + backdrop-filter: blur(12px); + } + + .doodle-logo { + display: inline-flex; + align-items: center; + gap: 10px; + border: 1px solid rgba(0, 0, 0, 0.1); + background: rgba(255, 255, 255, 0.72); + padding: 10px 14px; + border-radius: 9999px; + box-shadow: 0 8px 24px rgba(34, 18, 10, 0.12); + backdrop-filter: blur(12px); + } + + .doodle-logo__mark { + width: 12px; + height: 12px; + border-radius: 4px; + background: linear-gradient(135deg, #f97316, #ff7ad8); + box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.06); + } + + .doodle-logo__text { + font-weight: 800; + letter-spacing: 0.02em; + } + + .toy-board-frame { + border-radius: 56px; + padding: 18px 18px 14px; + background: linear-gradient(180deg, #4b86ff 0%, #2e59d9 60%, #254ab8 100%); + border: 1px solid rgba(255, 255, 255, 0.18); + box-shadow: + 0 26px 70px rgba(24, 25, 58, 0.25), + inset 0 10px 22px rgba(255, 255, 255, 0.18), + inset 0 -14px 30px rgba(0, 0, 0, 0.18); + position: relative; + z-index: 0; + } + + .toy-board-frame::before { + content: ""; + position: absolute; + inset: 12px 12px auto 12px; + height: 84px; + border-radius: 48px; + background: radial-gradient( + 700px 120px at 50% 0%, + rgba(255, 255, 255, 0.22), + transparent 70% + ); + pointer-events: none; + } + + .toy-board-frame::after { + content: ""; + position: absolute; + inset: 18px 22px 10px 22px; + border-radius: 54px; + background: rgba(16, 22, 56, 0.28); + transform: translateY(22px); + filter: blur(22px); + opacity: 0.65; + z-index: -1; + pointer-events: none; + } + + .toy-board-sticker { + height: 44px; + width: 44px; + border-radius: 9999px; + border: 1px solid rgba(0, 0, 0, 0.12); + box-shadow: + 0 10px 22px rgba(0, 0, 0, 0.18), + inset 0 10px 18px rgba(255, 255, 255, 0.28); + position: relative; + transition: transform 120ms ease, box-shadow 120ms ease; + } + + .toy-board-sticker:hover { + transform: translateY(-1px); + box-shadow: + 0 14px 26px rgba(0, 0, 0, 0.2), + inset 0 10px 18px rgba(255, 255, 255, 0.28); + } + + .toy-board-sticker__shine { + position: absolute; + inset: 9px 10px auto 10px; + height: 14px; + border-radius: 9999px; + background: rgba(255, 255, 255, 0.38); + transform: rotate(-14deg); + pointer-events: none; + } + + .toy-board-bezel { + border-radius: 48px; + padding: 12px; + background: linear-gradient(180deg, rgba(255, 255, 255, 0.88), rgba(233, 238, 248, 0.9)); + border: 1px solid rgba(0, 0, 0, 0.08); + box-shadow: + inset 0 16px 28px rgba(255, 255, 255, 0.5), + inset 0 -18px 26px rgba(0, 0, 0, 0.12); + } + + .toy-board-screen { + border-radius: 38px; + background: linear-gradient(180deg, rgba(248, 248, 248, 0.95), rgba(236, 236, 236, 0.95)); + border: 1px solid rgba(0, 0, 0, 0.1); + box-shadow: + inset 0 2px 0 rgba(255, 255, 255, 0.7), + inset 0 -18px 40px rgba(0, 0, 0, 0.18); + position: relative; + } + + .toy-board-pen-slot { + display: flex; + align-items: center; + justify-content: center; + } + + .toy-board-pen-slot__well { + width: 100%; + height: 100%; + border-radius: 44px; + background: linear-gradient(180deg, #2b56d3, #2244b6 75%, #1a3795); + border: 1px solid rgba(0, 0, 0, 0.14); + box-shadow: + inset 0 16px 24px rgba(255, 255, 255, 0.16), + inset 0 -18px 30px rgba(0, 0, 0, 0.22); + position: relative; + overflow: hidden; + } + + .toy-board-pen-slot__cord { + position: absolute; + left: 8px; + top: 34px; + width: 28px; + height: 10px; + border-radius: 9999px; + background: rgba(255, 255, 255, 0.22); + box-shadow: 0 6px 18px rgba(0, 0, 0, 0.18); + transform: rotate(6deg); + } + + .toy-board-pen-slot__stylus { + position: absolute; + inset: 0; + display: flex; + align-items: center; + justify-content: center; + padding-top: 10px; + } + + .toy-board-bottom { + position: relative; + margin-top: 14px; + height: 44px; + border-radius: 9999px; + background: linear-gradient(180deg, rgba(28, 54, 148, 0.55), rgba(12, 22, 70, 0.28)); + box-shadow: + inset 0 10px 18px rgba(255, 255, 255, 0.12), + inset 0 -14px 22px rgba(0, 0, 0, 0.25); + } + + .toy-board-bottom__rail { + position: absolute; + left: 16px; + right: 66px; + top: 16px; + height: 12px; + border-radius: 9999px; + background: rgba(11, 18, 32, 0.22); + box-shadow: inset 0 3px 8px rgba(0, 0, 0, 0.25); + } + + .toy-board-bottom__knob { + position: absolute; + right: 14px; + top: 8px; + height: 28px; + width: 44px; + border-radius: 9999px; + background: linear-gradient(180deg, #ffe16f, #ffb000); + border: 1px solid rgba(0, 0, 0, 0.18); + box-shadow: + 0 10px 18px rgba(0, 0, 0, 0.22), + inset 0 10px 14px rgba(255, 255, 255, 0.26); + } +} + /* Custom Scrollbar Styles */ ::-webkit-scrollbar { width: 10px; diff --git a/ai_agents/playground/src/app/layout.tsx b/ai_agents/playground/src/app/layout.tsx index 34c44e706a..43d75fef25 100644 --- a/ai_agents/playground/src/app/layout.tsx +++ b/ai_agents/playground/src/app/layout.tsx @@ -45,14 +45,14 @@ export default function RootLayout({ > */} {children} {/* */} - + diff --git a/ai_agents/playground/src/app/page.tsx b/ai_agents/playground/src/app/page.tsx index ba00217245..6d24891c36 100644 --- a/ai_agents/playground/src/app/page.tsx +++ b/ai_agents/playground/src/app/page.tsx @@ -1,99 +1,13 @@ "use client"; -import { IMicrophoneAudioTrack } from "agora-rtc-sdk-ng"; -import dynamic from "next/dynamic"; import React from "react"; -import { EMobileActiveTab, useAppSelector, useIsCompactLayout } from "@/common"; -import Avatar from "@/components/Agent/AvatarTrulience"; import AuthInitializer from "@/components/authInitializer"; -import Action from "@/components/Layout/Action"; -import Header from "@/components/Layout/Header"; -import { cn } from "@/lib/utils"; -import { type IRtcUser, IUserTracks } from "@/manager"; - -const DynamicRTCCard = dynamic(() => import("@/components/Dynamic/RTCCard"), { - ssr: false, -}); -const DynamicChatCard = dynamic(() => import("@/components/Chat/ChatCard"), { - ssr: false, -}); +import ImmersiveShell from "@/components/Doodler/ImmersiveShell"; export default function Home() { - const mobileActiveTab = useAppSelector( - (state) => state.global.mobileActiveTab - ); - const trulienceSettings = useAppSelector( - (state) => state.global.trulienceSettings - ); - - const isCompactLayout = useIsCompactLayout(); - const useTrulienceAvatar = trulienceSettings.enabled; - const avatarInLargeWindow = trulienceSettings.avatarDesktopLargeWindow; - const [remoteuser, setRemoteUser] = React.useState(); - - React.useEffect(() => { - const { rtcManager } = require("../manager/rtc/rtc"); - rtcManager.on("remoteUserChanged", onRemoteUserChanged); - return () => { - rtcManager.off("remoteUserChanged", onRemoteUserChanged); - }; - }, []); - - const onRemoteUserChanged = (user: IRtcUser) => { - if (useTrulienceAvatar) { - user.audioTrack?.stop(); - } - if (user.audioTrack) { - setRemoteUser(user); - } - }; - return ( -
-
- -
- - - {(!useTrulienceAvatar || isCompactLayout || !avatarInLargeWindow) && ( - - )} - - {useTrulienceAvatar && avatarInLargeWindow && ( -
- -
- )} -
-
+
); } diff --git a/ai_agents/playground/src/components/Doodle/DoodlePanel.tsx b/ai_agents/playground/src/components/Doodle/DoodlePanel.tsx new file mode 100644 index 0000000000..5b6efea302 --- /dev/null +++ b/ai_agents/playground/src/components/Doodle/DoodlePanel.tsx @@ -0,0 +1,144 @@ +"use client"; + +import * as React from "react"; +import { useAppSelector } from "@/common"; +import { Button } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; +import { rtmManager } from "@/manager/rtm"; +import { EMessageDataType, type IChatItem } from "@/types"; + +const STYLES = ["cartoon", "watercolor", "crayon", "pixel art"]; + +export default function DoodlePanel(props: { className?: string }) { + const { className } = props; + const chatItems = useAppSelector((state) => state.global.chatItems); + const options = useAppSelector((state) => state.global.options); + const agentConnected = useAppSelector((state) => state.global.agentConnected); + const rtmConnected = useAppSelector((state) => state.global.rtmConnected); + + const images = React.useMemo( + () => chatItems.filter((i) => i.data_type === EMessageDataType.IMAGE), + [chatItems] + ); + const [selectedIndex, setSelectedIndex] = React.useState( + images.length ? images.length - 1 : -1 + ); + React.useEffect(() => { + if (images.length) { + setSelectedIndex(images.length - 1); + } + }, [images.length]); + + const disableControls = + !options.channel || + !options.userId || + !options.appId || + !options.token || + !rtmConnected || + !agentConnected; + + const current: IChatItem | undefined = + selectedIndex >= 0 ? images[selectedIndex] : undefined; + + const [refine, setRefine] = React.useState(""); + + const handleRefineSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (!refine || disableControls) return; + rtmManager.sendText(refine); + setRefine(""); + }; + + const handleStyleClick = (style: string) => { + if (disableControls) return; + rtmManager.sendText(`Make the picture ${style} style`); + }; + + return ( +
+
+

Doodle Panel

+
+ {STYLES.map((s) => ( + + ))} +
+
+ +
+ {current ? ( + current doodle + ) : ( +

+ No doodles yet. Describe your idea to start! +

+ )} +
+ +
+ {images.map((img, idx) => ( + + ))} +
+ +
+ setRefine(e.target.value)} + className={cn( + "grow rounded-md border bg-background p-1.5 focus:outline-hidden focus:ring-1 focus:ring-ring", + { + ["cursor-not-allowed"]: disableControls, + } + )} + disabled={disableControls} + /> + +
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/AppShell.tsx b/ai_agents/playground/src/components/Doodler/AppShell.tsx new file mode 100644 index 0000000000..5739018602 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/AppShell.tsx @@ -0,0 +1,161 @@ +"use client"; + +import * as React from "react"; +import { useAppDispatch, useAppSelector } from "@/common"; +import { cn } from "@/lib/utils"; +import { + addChatItem, + setOptions, + setRoomConnected, +} from "@/store/reducers/global"; +import { EMessageDataType, EMessageType, type IChatItem } from "@/types"; +import DoodleCanvas from "./Canvas"; +import ControlsBar from "./ControlsBar"; +import LoadingAnimator from "./LoadingAnimator"; + +export default function AppShell() { + const dispatch = useAppDispatch(); + const options = useAppSelector((s) => s.global.options); + const [channel, setChannel] = React.useState( + options.channel || "voice_image_kids" + ); + const [userId, setUserId] = React.useState( + options.userId || Math.floor(100000 + Math.random() * 900000) + ); + const rtcRef = React.useRef(null); + + React.useEffect(() => { + let mounted = true; + import("@/manager/rtc/rtc").then((m) => { + if (mounted) rtcRef.current = m.rtcManager; + }); + return () => { + mounted = false; + }; + }, []); + + const connect = async () => { + const { apiStartService } = await import("@/common"); + await apiStartService({ + channel, + userId, + graphName: "voice_image_kids", + language: "en-US", + voiceType: "female", + }); + const rtc = rtcRef.current; + rtc.on("textChanged", (text: IChatItem) => dispatch(addChatItem(text))); + rtc.on("localTracksChanged", () => {}); + rtc.on("remoteUserChanged", () => {}); + await rtc.createMicrophoneAudioTrack(); + await rtc.join({ channel, userId }); + dispatch( + setOptions({ + ...options, + channel, + userId, + appId: rtc.appId ?? "", + token: rtc.token ?? "", + }) + ); + await rtc.publish(); + dispatch(setRoomConnected(true)); + }; + + const disconnect = async () => { + await rtcRef.current?.destroy(); + dispatch(setRoomConnected(false)); + }; + + return ( +
+
+

+ Doodler +

+
+ setChannel(e.target.value)} + aria-label="Channel" + className={cn( + "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" + )} + /> + setUserId(Number(e.target.value))} + aria-label="User ID" + className={cn( + "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" + )} + /> + + + + Kid Mode + +
+
+
+
+ + +
+ +
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/BoardStage.tsx b/ai_agents/playground/src/components/Doodler/BoardStage.tsx new file mode 100644 index 0000000000..0a20b73e91 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/BoardStage.tsx @@ -0,0 +1,228 @@ +"use client"; + +import * as React from "react"; +import { AnimatePresence, motion } from "framer-motion"; +import { cn } from "@/lib/utils"; +import type { DoodlePhase } from "./MagicCanvasBackground"; + +const STICKERS = [ + { bg: "bg-[#BFA2FF]", ring: "ring-[#5C3DDE]" }, + { bg: "bg-[#9EE7B2]", ring: "ring-[#1C8B43]" }, + { bg: "bg-[#9DD8FF]", ring: "ring-[#1D6FE2]" }, + { bg: "bg-[#FFB3C7]", ring: "ring-[#E72D6A]" }, +]; + +function SurfaceTexture() { + const patternId = React.useId(); + return ( + + + + + + + + + + ); +} + +function Stylus() { + return ( + + + + + + + ); +} + +function ToyStylusAnimator(props: { + phase?: DoodlePhase; + reducedMotion: boolean; +}) { + const { phase, reducedMotion } = props; + + if (reducedMotion) return null; + + const active = Boolean(phase && phase !== "idle"); + const looping = phase === "queued" || phase === "sketch" || phase === "color"; + const anim = React.useMemo(() => { + if (!looping) { + return { + left: "82%", + top: "62%", + rotate: 18, + }; + } + return { + left: ["86%", "44%", "70%", "52%", "78%"], + top: ["56%", "38%", "68%", "54%", "34%"], + rotate: [18, 6, 24, 10, 22], + }; + }, [looping]); + + return ( + + {active ? ( + + + + ) : null} + + ); +} + +export default function BoardStage(props: { + imageUrl?: string; + caption?: string; + className?: string; + overlay?: React.ReactNode; + phase?: DoodlePhase; + reducedMotion?: boolean; +}) { + const { imageUrl, caption, className, overlay, phase, reducedMotion = false } = + props; + const drawingActive = Boolean(phase && phase !== "idle"); + + return ( +
+
+
+
+ {STICKERS.map((s, idx) => ( + + ))} +
+ +
+
+ +
+ +
+ + {imageUrl ? ( + +
+ {caption +
+
+ ) : ( + +
+ + Say it out loud or type a prompt to start doodling. +
+

+ Your latest creation shows up here—like a magnetic doodle board. +

+
+ )} +
+
+ +
+ {overlay} + +
+
+
+ +
+
+
+
+ +
+
+
+
+ +
+
+
+
+
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/Canvas.tsx b/ai_agents/playground/src/components/Doodler/Canvas.tsx new file mode 100644 index 0000000000..3dd2609725 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/Canvas.tsx @@ -0,0 +1,38 @@ +"use client"; + +import * as React from "react"; +import { useAppSelector } from "@/common"; +import { cn } from "@/lib/utils"; +import { EMessageDataType, type IChatItem } from "@/types"; + +export default function DoodleCanvas() { + const chatItems = useAppSelector((s) => s.global.chatItems); + const images = React.useMemo( + () => chatItems.filter((i) => i.data_type === EMessageDataType.IMAGE), + [chatItems] + ); + const current: IChatItem | undefined = images.length + ? images[images.length - 1] + : undefined; + return ( +
+ {current ? ( + doodle + ) : ( +

+ Describe your idea to start! +

+ )} +
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/ControlsBar.tsx b/ai_agents/playground/src/components/Doodler/ControlsBar.tsx new file mode 100644 index 0000000000..9ef385226d --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/ControlsBar.tsx @@ -0,0 +1,94 @@ +"use client"; + +import * as React from "react"; +import { useAppSelector } from "@/common"; +import { cn } from "@/lib/utils"; + +const STYLES = ["cartoon", "crayon", "watercolor"]; + +export default function ControlsBar() { + const options = useAppSelector((s) => s.global.options); + const agentConnected = useAppSelector((s) => s.global.agentConnected); + const rtmConnected = useAppSelector((s) => s.global.rtmConnected); + const disable = + !options.channel || !options.userId || !rtmConnected || !agentConnected; + const [val, setVal] = React.useState(""); + const rtmRef = React.useRef(null); + + React.useEffect(() => { + let mounted = true; + // Dynamically import to avoid SSR evaluating window-dependent code + import("@/manager/rtm") + .then((m) => { + if (mounted) rtmRef.current = m.rtmManager; + }) + .catch(() => {}); + return () => { + mounted = false; + }; + }, []); + + const submit = (e: React.FormEvent) => { + e.preventDefault(); + if (!val || disable) return; + rtmRef.current?.sendText(val); + setVal(""); + }; + + const styleClick = (s: string) => { + if (disable) return; + rtmRef.current?.sendText(`Make it ${s} style`); + }; + + return ( +
+
+ {STYLES.map((s) => ( + + ))} +
+
+ setVal(e.target.value)} + placeholder="Tell Doodler what to draw" + className={cn( + "grow rounded-xl border border-[#2b2e35] bg-[#15161a] p-3 text-white", + { ["cursor-not-allowed"]: disable } + )} + aria-label="Prompt input" + disabled={disable} + /> + +
+
+ Sounds can be toggled in Settings. +
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/FloatingDoodles.tsx b/ai_agents/playground/src/components/Doodler/FloatingDoodles.tsx new file mode 100644 index 0000000000..235bfc4a0e --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/FloatingDoodles.tsx @@ -0,0 +1,697 @@ +"use client"; + +import * as React from "react"; +import { motion } from "framer-motion"; +import { cn } from "@/lib/utils"; + +type DoodleItem = { + key: string; + left: string; + top: string; + size: number; + rotate: number; + opacity: number; + color: string; + variant: + | "star" + | "cloud" + | "spark" + | "swirl" + | "heart" + | "bolt" + | "sun" + | "flower" + | "arrow" + | "gift" + | "moon" + | "leaf"; + duration: number; + delay: number; +}; + +function Star(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Cloud(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Spark(props: { color: string }) { + const { color } = props; + return ( + + + + + ); +} + +function Swirl(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Heart(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Bolt(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Sun(props: { color: string }) { + const { color } = props; + return ( + + + + + ); +} + +function Flower(props: { color: string }) { + const { color } = props; + return ( + + + + + + + + + ); +} + +function Arrow(props: { color: string }) { + const { color } = props; + return ( + + + + + ); +} + +function Gift(props: { color: string }) { + const { color } = props; + return ( + + + + + + ); +} + +function Moon(props: { color: string }) { + const { color } = props; + return ( + + + + ); +} + +function Leaf(props: { color: string }) { + const { color } = props; + return ( + + + + + ); +} + +function DoodleSvg(props: { variant: DoodleItem["variant"]; color: string }) { + const { variant, color } = props; + switch (variant) { + case "star": + return ; + case "cloud": + return ; + case "spark": + return ; + case "heart": + return ; + case "bolt": + return ; + case "sun": + return ; + case "flower": + return ; + case "arrow": + return ; + case "gift": + return ; + case "moon": + return ; + case "leaf": + return ; + default: + return ; + } +} + +const INK = "rgba(18, 18, 18, 0.7)"; +const INK_SOFT = "rgba(18, 18, 18, 0.55)"; + +const DEFAULT_DOODLES: DoodleItem[] = [ + { + key: "star-1", + left: "10%", + top: "18%", + size: 64, + rotate: -12, + opacity: 0.32, + color: INK, + variant: "star", + duration: 18, + delay: 0, + }, + { + key: "cloud-1", + left: "22%", + top: "66%", + size: 92, + rotate: 6, + opacity: 0.24, + color: INK_SOFT, + variant: "cloud", + duration: 22, + delay: 1.2, + }, + { + key: "swirl-1", + left: "74%", + top: "24%", + size: 90, + rotate: 10, + opacity: 0.26, + color: INK_SOFT, + variant: "swirl", + duration: 20, + delay: 0.6, + }, + { + key: "spark-1", + left: "86%", + top: "62%", + size: 58, + rotate: 18, + opacity: 0.3, + color: INK, + variant: "spark", + duration: 16, + delay: 1.8, + }, + { + key: "heart-1", + left: "44%", + top: "14%", + size: 54, + rotate: -6, + opacity: 0.24, + color: INK_SOFT, + variant: "heart", + duration: 19, + delay: 0.3, + }, + { + key: "bolt-1", + left: "56%", + top: "78%", + size: 64, + rotate: -18, + opacity: 0.28, + color: INK, + variant: "bolt", + duration: 24, + delay: 2.2, + }, + { + key: "spark-2", + left: "6%", + top: "44%", + size: 52, + rotate: 8, + opacity: 0.26, + color: INK_SOFT, + variant: "spark", + duration: 17, + delay: 0.9, + }, + { + key: "cloud-2", + left: "78%", + top: "6%", + size: 82, + rotate: -8, + opacity: 0.2, + color: INK_SOFT, + variant: "cloud", + duration: 21, + delay: 1.5, + }, + { + key: "star-2", + left: "64%", + top: "44%", + size: 50, + rotate: 16, + opacity: 0.28, + color: INK, + variant: "star", + duration: 14, + delay: 0.4, + }, + { + key: "heart-2", + left: "34%", + top: "78%", + size: 48, + rotate: 12, + opacity: 0.24, + color: INK_SOFT, + variant: "heart", + duration: 18, + delay: 2.4, + }, + { + key: "swirl-2", + left: "30%", + top: "32%", + size: 72, + rotate: -14, + opacity: 0.22, + color: INK_SOFT, + variant: "swirl", + duration: 23, + delay: 1.1, + }, + { + key: "bolt-2", + left: "90%", + top: "38%", + size: 46, + rotate: 22, + opacity: 0.22, + color: INK, + variant: "bolt", + duration: 15, + delay: 2.8, + }, + { + key: "sun-1", + left: "6%", + top: "6%", + size: 72, + rotate: 8, + opacity: 0.26, + color: INK, + variant: "sun", + duration: 20, + delay: 0.7, + }, + { + key: "flower-1", + left: "62%", + top: "8%", + size: 70, + rotate: -6, + opacity: 0.22, + color: INK_SOFT, + variant: "flower", + duration: 22, + delay: 1.9, + }, + { + key: "arrow-1", + left: "18%", + top: "52%", + size: 80, + rotate: 6, + opacity: 0.24, + color: INK, + variant: "arrow", + duration: 19, + delay: 2.6, + }, + { + key: "gift-1", + left: "86%", + top: "20%", + size: 62, + rotate: 12, + opacity: 0.22, + color: INK_SOFT, + variant: "gift", + duration: 18, + delay: 1.3, + }, + { + key: "moon-1", + left: "8%", + top: "80%", + size: 70, + rotate: -8, + opacity: 0.24, + color: INK, + variant: "moon", + duration: 21, + delay: 0.8, + }, + { + key: "leaf-1", + left: "72%", + top: "70%", + size: 76, + rotate: 14, + opacity: 0.2, + color: INK_SOFT, + variant: "leaf", + duration: 24, + delay: 2.2, + }, + { + key: "sun-2", + left: "84%", + top: "78%", + size: 56, + rotate: -6, + opacity: 0.2, + color: INK_SOFT, + variant: "sun", + duration: 18, + delay: 1.6, + }, + { + key: "flower-2", + left: "16%", + top: "30%", + size: 60, + rotate: 10, + opacity: 0.22, + color: INK, + variant: "flower", + duration: 19, + delay: 2.1, + }, + { + key: "arrow-2", + left: "40%", + top: "6%", + size: 78, + rotate: -12, + opacity: 0.24, + color: INK, + variant: "arrow", + duration: 20, + delay: 0.5, + }, + { + key: "gift-2", + left: "6%", + top: "70%", + size: 54, + rotate: 8, + opacity: 0.2, + color: INK_SOFT, + variant: "gift", + duration: 21, + delay: 1.9, + }, + { + key: "moon-2", + left: "46%", + top: "88%", + size: 60, + rotate: 12, + opacity: 0.2, + color: INK_SOFT, + variant: "moon", + duration: 23, + delay: 2.5, + }, + { + key: "leaf-2", + left: "92%", + top: "50%", + size: 62, + rotate: -10, + opacity: 0.18, + color: INK_SOFT, + variant: "leaf", + duration: 19, + delay: 0.9, + }, + { + key: "spark-3", + left: "12%", + top: "54%", + size: 46, + rotate: 18, + opacity: 0.26, + color: INK, + variant: "spark", + duration: 16, + delay: 2.7, + }, + { + key: "star-3", + left: "52%", + top: "38%", + size: 44, + rotate: -8, + opacity: 0.22, + color: INK_SOFT, + variant: "star", + duration: 17, + delay: 1.4, + }, + { + key: "cloud-3", + left: "28%", + top: "10%", + size: 70, + rotate: 6, + opacity: 0.18, + color: INK_SOFT, + variant: "cloud", + duration: 22, + delay: 0.8, + }, + { + key: "bolt-3", + left: "68%", + top: "54%", + size: 52, + rotate: 16, + opacity: 0.24, + color: INK, + variant: "bolt", + duration: 18, + delay: 2.3, + }, + { + key: "heart-3", + left: "22%", + top: "86%", + size: 44, + rotate: -12, + opacity: 0.2, + color: INK_SOFT, + variant: "heart", + duration: 19, + delay: 2.9, + }, + { + key: "swirl-3", + left: "82%", + top: "32%", + size: 68, + rotate: 10, + opacity: 0.2, + color: INK_SOFT, + variant: "swirl", + duration: 24, + delay: 1.7, + }, +]; + +export default function FloatingDoodles(props: { reducedMotion: boolean }) { + const { reducedMotion } = props; + + return ( +
+ {DEFAULT_DOODLES.map((d) => { + const Comp = ( +
+ +
+ ); + + return reducedMotion ? ( +
+ {Comp} +
+ ) : ( + + {Comp} + + ); + })} +
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx b/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx new file mode 100644 index 0000000000..9b09c14249 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx @@ -0,0 +1,540 @@ +"use client"; + +import * as React from "react"; +import { motion } from "framer-motion"; +import { toast } from "sonner"; +import { useAppDispatch, useAppSelector, useMultibandTrackVolume } from "@/common"; +import { Button } from "@/components/ui/button"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, +} from "@/components/ui/select"; +import { cn } from "@/lib/utils"; +import { + addChatItem, + setAgentConnected, + setOptions, + setRoomConnected, + setRtmConnected, +} from "@/store/reducers/global"; +import { EMessageDataType, EMessageType, type IChatItem } from "@/types"; +import BoardStage from "./BoardStage"; +import MagicCanvasBackground, { + type CreativeMode, + type DoodlePhase, +} from "./MagicCanvasBackground"; +import MagicPenAnimator from "./MagicPenAnimator"; +import MouseTrail from "./MouseTrail"; +import TranscriptPanel from "./TranscriptPanel"; +import type { IMicrophoneAudioTrack } from "agora-rtc-sdk-ng"; + +function usePrefersReducedMotion() { + const [reduced, setReduced] = React.useState(false); + React.useEffect(() => { + const mql = window.matchMedia("(prefers-reduced-motion: reduce)"); + setReduced(mql.matches); + const onChange = () => setReduced(mql.matches); + mql.addEventListener("change", onChange); + return () => mql.removeEventListener("change", onChange); + }, []); + return reduced; +} + +function getLatestImage(chatItems: IChatItem[]) { + const images = chatItems.filter((i) => i.data_type === EMessageDataType.IMAGE); + return images.length ? images[images.length - 1] : undefined; +} + +function getLastTime(chatItems: IChatItem[], predicate: (i: IChatItem) => boolean) { + for (let idx = chatItems.length - 1; idx >= 0; idx -= 1) { + const item = chatItems[idx]; + if (predicate(item)) return item.time ?? 0; + } + return 0; +} + +export default function ImmersiveShell() { + const dispatch = useAppDispatch(); + const reducedMotion = usePrefersReducedMotion(); + + const options = useAppSelector((s) => s.global.options); + const roomConnected = useAppSelector((s) => s.global.roomConnected); + const agentConnected = useAppSelector((s) => s.global.agentConnected); + const rtmConnected = useAppSelector((s) => s.global.rtmConnected); + const chatItems = useAppSelector((s) => s.global.chatItems); + + const mode: CreativeMode = "classic"; + + const [channel, setChannel] = React.useState(options.channel || "voice_image_kids"); + const [userId, setUserId] = React.useState( + options.userId || Math.floor(100000 + Math.random() * 900000) + ); + + const rtcRef = React.useRef(null); + const rtmRef = React.useRef(null); + const [connecting, setConnecting] = React.useState(false); + const [micTrack, setMicTrack] = React.useState(); + const [micMediaTrack, setMicMediaTrack] = React.useState(); + const [micMuted, setMicMuted] = React.useState(false); + const [micDevices, setMicDevices] = React.useState< + { label: string; value: string; deviceId: string }[] + >([{ label: "Default microphone", value: "default", deviceId: "" }]); + const [micValue, setMicValue] = React.useState("default"); + const [boardHeight, setBoardHeight] = React.useState(null); + + React.useEffect(() => { + if (roomConnected) return; + if (options.channel) setChannel(options.channel); + if (options.userId) setUserId(options.userId); + }, [options.channel, options.userId, roomConnected]); + + React.useEffect(() => { + let mounted = true; + import("@/manager/rtc/rtc").then((m) => { + if (mounted) rtcRef.current = m.rtcManager; + }); + import("@/manager/rtm").then((m) => { + if (mounted) rtmRef.current = m.rtmManager; + }); + return () => { + mounted = false; + }; + }, []); + + const onTextChanged = React.useCallback( + (text: IChatItem) => dispatch(addChatItem(text)), + [dispatch] + ); + + const latestImage = React.useMemo(() => getLatestImage(chatItems), [chatItems]); + const lastUserTime = React.useMemo( + () => + getLastTime( + chatItems, + (i) => i.type === EMessageType.USER && i.data_type === EMessageDataType.TEXT + ), + [chatItems] + ); + const lastImageTime = latestImage?.time ?? 0; + const isGenerating = lastUserTime > lastImageTime; + + const [phase, setPhase] = React.useState("idle"); + React.useEffect(() => { + if (isGenerating) { + setPhase("queued"); + const t1 = window.setTimeout(() => setPhase("sketch"), 450); + const t2 = window.setTimeout(() => setPhase("color"), 1550); + return () => { + window.clearTimeout(t1); + window.clearTimeout(t2); + }; + } + + if (lastImageTime > 0 && lastImageTime >= lastUserTime) { + setPhase("complete"); + const t = window.setTimeout(() => setPhase("idle"), 1700); + return () => window.clearTimeout(t); + } + + setPhase("idle"); + return; + }, [isGenerating, lastImageTime, lastUserTime]); + + const canConnect = channel.trim().length > 0 && userId > 0; + const controlsEnabled = roomConnected && agentConnected && rtmConnected; + const isBoardGenerating = + phase === "queued" || phase === "sketch" || phase === "color"; + const isConnected = roomConnected && agentConnected; + + const micBands = useMultibandTrackVolume(micMediaTrack, 10, 80, 520); + const micLevels = React.useMemo(() => { + return micBands.map((band) => { + if (!band.length) return 0; + let sum = 0; + for (let i = 0; i < band.length; i += 1) sum += band[i]; + return sum / band.length; + }); + }, [micBands]); + + React.useEffect(() => { + if (!micTrack) { + setMicMuted(false); + return; + } + micTrack.setMuted(micMuted); + }, [micMuted, micTrack]); + + React.useEffect(() => { + if (!micTrack) { + setMicDevices([{ label: "Default microphone", value: "default", deviceId: "" }]); + setMicValue("default"); + return; + } + let active = true; + const currentLabel = micTrack.getTrackLabel() || "Default microphone"; + const load = async () => { + try { + const mod = await import("agora-rtc-sdk-ng"); + if (!active) return; + const arr = await mod.default.getMicrophones(); + if (!active) return; + const items = arr.map((item, index) => { + const label = item.label?.trim() || `Microphone ${index + 1}`; + return { + label, + value: item.deviceId || label, + deviceId: item.deviceId, + }; + }); + setMicDevices([ + { label: "Default microphone", value: "default", deviceId: "" }, + ...items, + ]); + const found = items.find((item) => item.label === currentLabel); + setMicValue(found?.value ?? "default"); + } catch { + if (!active) return; + setMicDevices([{ label: "Default microphone", value: "default", deviceId: "" }]); + setMicValue("default"); + } + }; + load(); + return () => { + active = false; + }; + }, [micTrack]); + + const connect = async () => { + if (!canConnect) { + toast.error("Please enter a channel and user id."); + return; + } + + if (connecting) return; + setConnecting(true); + + try { + const { apiStartService } = await import("@/common"); + const startResp = await apiStartService({ + channel, + userId, + graphName: "voice_image_kids", + language: "en-US", + voiceType: "female", + }); + const { code, msg } = startResp || {}; + if (code != null && String(code) !== "0") { + throw new Error(msg || `Agent start failed (code=${code})`); + } + dispatch(setAgentConnected(true)); + + const rtc = rtcRef.current; + if (!rtc) { + throw new Error("RTC manager not ready yet. Please try again."); + } + rtc.off("textChanged", onTextChanged); + rtc.on("textChanged", onTextChanged); + await rtc.createMicrophoneAudioTrack(); + const track: IMicrophoneAudioTrack | undefined = rtc.localTracks?.audioTrack; + setMicTrack(track); + setMicMediaTrack(track?.getMediaStreamTrack()); + await rtc.join({ channel, userId }); + + dispatch( + setOptions({ + ...options, + channel, + userId, + appId: rtc.appId ?? "", + token: rtc.token ?? "", + }) + ); + await rtc.publish(); + dispatch(setRoomConnected(true)); + + const rtm = rtmRef.current; + if (rtm?.init) { + await rtm.init({ + channel, + userId, + appId: rtc.appId ?? "", + token: rtc.token ?? "", + }); + const ok = Boolean(rtm?._client); + dispatch(setRtmConnected(ok)); + if (!ok) { + toast.error("Connected, but messaging failed. Text prompts are disabled."); + } + } + + toast.success("Doodle board connected."); + } catch (err: any) { + console.error(err); + try { + const { apiStopService } = await import("@/common"); + await apiStopService(channel); + } catch { + // best-effort + } + toast.error(err?.message || "Failed to connect."); + dispatch(setAgentConnected(false)); + dispatch(setRoomConnected(false)); + dispatch(setRtmConnected(false)); + } finally { + setConnecting(false); + } + }; + + const disconnect = async () => { + if (connecting) return; + setConnecting(true); + try { + const { apiStopService } = await import("@/common"); + await apiStopService(channel); + } catch { + // best-effort + } + try { + await rtmRef.current?.destroy?.(); + rtcRef.current?.off?.("textChanged", onTextChanged); + await rtcRef.current?.destroy?.(); + } finally { + dispatch(setRtmConnected(false)); + dispatch(setRoomConnected(false)); + dispatch(setAgentConnected(false)); + setMicMediaTrack(undefined); + setMicTrack(undefined); + toast.message("Disconnected."); + setConnecting(false); + } + }; + + const sendText = async (text: string) => { + const msg = text.trim(); + if (!msg) return; + if (!controlsEnabled) { + toast.error("Connect the board first."); + return; + } + try { + await rtmRef.current?.sendText?.(msg); + dispatch( + addChatItem({ + userId: options.userId || userId, + text: msg, + type: EMessageType.USER, + data_type: EMessageDataType.TEXT, + isFinal: true, + time: Date.now(), + }) + ); + } catch (err: any) { + console.error(err); + toast.error(err?.message || "Failed to send."); + } + }; + + const onMicChange = async (value: string) => { + setMicValue(value); + const target = micDevices.find((item) => item.value === value); + if (!target || !micTrack) return; + await micTrack.setDevice(target.deviceId); + }; + + const selectedMicLabel = + micDevices.find((item) => item.value === micValue)?.label || + "Microphone"; + + const boardRef = React.useRef(null); + React.useEffect(() => { + if (!boardRef.current) return; + const node = boardRef.current; + let raf = 0; + const update = () => { + raf = window.requestAnimationFrame(() => { + const next = Math.round(node.getBoundingClientRect().height); + setBoardHeight((prev) => (prev === next ? prev : next)); + }); + }; + update(); + const observer = new ResizeObserver(update); + observer.observe(node); + return () => { + observer.disconnect(); + if (raf) window.cancelAnimationFrame(raf); + }; + }, []); + + const boardPose = React.useMemo(() => { + if (reducedMotion) { + return { x: 0, rotateX: 0, rotateY: 0, scale: 1 }; + } + if (isBoardGenerating) { + return { x: -32, rotateX: 7, rotateY: -14, scale: 1.02 }; + } + return { x: 0, rotateX: 0, rotateY: 0, scale: 1 }; + }, [isBoardGenerating, reducedMotion]); + + const boardPoseTransition = React.useMemo(() => { + if (reducedMotion) return { duration: 0 }; + return { + type: "spring", + stiffness: isBoardGenerating ? 200 : 260, + damping: isBoardGenerating ? 24 : 26, + mass: 0.7, + }; + }, [isBoardGenerating, reducedMotion]); + + return ( +
+ + + +
+
+
+ + + + } + /> + + + +
+
+ +
+
+
+ + + + + +
+
+
+
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/LoadingAnimator.tsx b/ai_agents/playground/src/components/Doodler/LoadingAnimator.tsx new file mode 100644 index 0000000000..41a6a07e51 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/LoadingAnimator.tsx @@ -0,0 +1,40 @@ +"use client"; + +import * as React from "react"; +import { useAppSelector } from "@/common"; +import { cn } from "@/lib/utils"; +import { EMessageDataType, EMessageType } from "@/types"; + +export default function LoadingAnimator() { + const items = useAppSelector((s) => s.global.chatItems); + const lastImageTime = React.useMemo(() => { + const img = [...items].reverse().find((i) => i.data_type === EMessageDataType.IMAGE); + return img?.time ?? 0; + }, [items]); + const lastUserTime = React.useMemo(() => { + const usr = [...items] + .reverse() + .find( + (i) => i.type === EMessageType.USER && i.data_type === EMessageDataType.TEXT + ); + return usr?.time ?? 0; + }, [items]); + const active = lastUserTime > lastImageTime; + return ( +
+
+
+
+
+ Doodling... +
+
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/MagicCanvasBackground.tsx b/ai_agents/playground/src/components/Doodler/MagicCanvasBackground.tsx new file mode 100644 index 0000000000..07ee01f098 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/MagicCanvasBackground.tsx @@ -0,0 +1,72 @@ +"use client"; + +import * as React from "react"; +import { motion } from "framer-motion"; +import { cn } from "@/lib/utils"; + +export type DoodlePhase = "idle" | "queued" | "sketch" | "color" | "complete"; +export type CreativeMode = "classic" | "neon"; + +export default function MagicCanvasBackground(props: { + phase: DoodlePhase; + mode: CreativeMode; + reducedMotion?: boolean; +}) { + const { phase, mode, reducedMotion = false } = props; + const isGenerating = phase === "queued" || phase === "sketch" || phase === "color"; + const showDream = phase === "complete"; + + return ( +
+ + + + +
+
+ ); +} diff --git a/ai_agents/playground/src/components/Doodler/MagicPenAnimator.tsx b/ai_agents/playground/src/components/Doodler/MagicPenAnimator.tsx new file mode 100644 index 0000000000..04a8694655 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/MagicPenAnimator.tsx @@ -0,0 +1,232 @@ +"use client"; + +import * as React from "react"; +import { AnimatePresence, motion } from "framer-motion"; +import { cn } from "@/lib/utils"; +import type { CreativeMode, DoodlePhase } from "./MagicCanvasBackground"; + +function PenIcon(props: { mode: CreativeMode }) { + const { mode } = props; + const isNeon = mode === "neon"; + const body = isNeon ? "#0B1220" : "#2B2116"; + const accent = isNeon ? "#7CFFFA" : "#F97316"; + return ( + + + + + + + ); +} + +function scribbleStroke(mode: CreativeMode, phase: DoodlePhase) { + const isNeon = mode === "neon"; + if (phase === "sketch") { + return { + color: isNeon ? "hsla(180, 95%, 60%, 0.75)" : "rgba(44, 33, 22, 0.45)", + width: isNeon ? 3.5 : 3, + dash: isNeon ? "10 10" : "9 10", + glow: isNeon, + }; + } + if (phase === "color") { + return { + color: isNeon ? "hsla(280, 95%, 68%, 0.75)" : "rgba(249, 115, 22, 0.35)", + width: isNeon ? 4 : 3.4, + dash: isNeon ? "0" : "0", + glow: isNeon, + }; + } + return { + color: isNeon ? "hsla(200, 95%, 60%, 0.7)" : "rgba(44, 33, 22, 0.35)", + width: isNeon ? 3.2 : 3, + dash: "10 10", + glow: isNeon, + }; +} + +export default function MagicPenAnimator(props: { + phase: DoodlePhase; + mode: CreativeMode; + reducedMotion: boolean; + showPen?: boolean; +}) { + const { phase, mode, reducedMotion, showPen = true } = props; + const show = phase !== "idle"; + const isNeon = mode === "neon"; + + const penAnimation = React.useMemo(() => { + if (phase === "queued") { + return { + x: ["-12%", "18%", "18%"], + y: ["-10%", "22%", "24%"], + rotate: [-10, 6, -3], + }; + } + if (phase === "sketch") { + return { + x: ["18%", "64%", "42%", "76%", "30%", "58%"], + y: ["24%", "30%", "64%", "58%", "46%", "34%"], + rotate: [8, 22, -18, 12, -10, 18], + }; + } + if (phase === "color") { + return { + x: ["58%", "38%", "70%", "52%"], + y: ["34%", "56%", "50%", "36%"], + rotate: [10, -6, 14, 6], + }; + } + return { + x: ["52%", "66%", "110%"], + y: ["52%", "20%", "-30%"], + rotate: [8, 28, 48], + }; + }, [phase]); + + const penTransition = React.useMemo(() => { + if (phase === "queued") { + return { duration: 0.9, ease: [0.22, 1, 0.36, 1] }; + } + if (phase === "sketch") { + return { duration: 1.8, ease: "easeInOut", repeat: Infinity }; + } + if (phase === "color") { + return { duration: 2.4, ease: "easeInOut", repeat: Infinity }; + } + return { duration: 0.9, ease: [0.22, 1, 0.36, 1] }; + }, [phase]); + + const stroke = scribbleStroke(mode, phase); + + if (reducedMotion) return null; + + return ( + + {show ? ( + + + {phase !== "complete" ? ( + <> + + + + ) : ( + <> + + {Array.from({ length: 12 }).map((_, i) => ( + + ))} + + )} + + + {showPen ? ( + + + + ) : null} + + ) : null} + + ); +} diff --git a/ai_agents/playground/src/components/Doodler/MouseTrail.tsx b/ai_agents/playground/src/components/Doodler/MouseTrail.tsx new file mode 100644 index 0000000000..133a7bd3a8 --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/MouseTrail.tsx @@ -0,0 +1,152 @@ +"use client"; + +import * as React from "react"; + +type CreativeMode = "classic" | "neon"; + +type Particle = { + x: number; + y: number; + vx: number; + vy: number; + r: number; + life: number; + ttl: number; + hue: number; +}; + +export default function MouseTrail(props: { + enabled: boolean; + mode: CreativeMode; +}) { + const { enabled, mode } = props; + const canvasRef = React.useRef(null); + const particlesRef = React.useRef([]); + const rafRef = React.useRef(null); + const pointerRef = React.useRef<{ x: number; y: number; t: number } | null>( + null + ); + + React.useEffect(() => { + if (!enabled) { + particlesRef.current = []; + return; + } + const canvas = canvasRef.current; + if (!canvas) return; + const ctx = canvas.getContext("2d"); + if (!ctx) return; + + const dpr = () => Math.max(1, Math.min(2, window.devicePixelRatio || 1)); + + const resize = () => { + const ratio = dpr(); + canvas.width = Math.floor(window.innerWidth * ratio); + canvas.height = Math.floor(window.innerHeight * ratio); + canvas.style.width = `${window.innerWidth}px`; + canvas.style.height = `${window.innerHeight}px`; + ctx.setTransform(ratio, 0, 0, ratio, 0, 0); + }; + + resize(); + window.addEventListener("resize", resize); + + const spawn = (x: number, y: number, speedX: number, speedY: number) => { + const ttl = mode === "neon" ? 520 : 340; + const r = mode === "neon" ? 2.2 : 1.6; + const hue = mode === "neon" ? 175 + Math.random() * 90 : 32; + particlesRef.current.push({ + x, + y, + vx: speedX, + vy: speedY, + r: r + Math.random() * 1.2, + life: 0, + ttl, + hue, + }); + if (particlesRef.current.length > 180) { + particlesRef.current.splice(0, particlesRef.current.length - 180); + } + }; + + const onMove = (e: PointerEvent) => { + const now = performance.now(); + const last = pointerRef.current; + pointerRef.current = { x: e.clientX, y: e.clientY, t: now }; + if (!last) return; + const dt = Math.max(8, now - last.t); + const dx = e.clientX - last.x; + const dy = e.clientY - last.y; + const vx = dx / dt; + const vy = dy / dt; + const dist = Math.hypot(dx, dy); + const count = Math.min(6, Math.max(1, Math.floor(dist / 18))); + for (let i = 0; i < count; i += 1) { + const t = (i + 1) / (count + 1); + spawn(last.x + dx * t, last.y + dy * t, vx * 10, vy * 10); + } + }; + + window.addEventListener("pointermove", onMove, { passive: true }); + + const tick = (now: number) => { + ctx.clearRect(0, 0, canvas.width, canvas.height); + + const particles = particlesRef.current; + ctx.save(); + ctx.globalCompositeOperation = mode === "neon" ? "lighter" : "source-over"; + + for (let i = particles.length - 1; i >= 0; i -= 1) { + const p = particles[i]; + p.life += 16; + const a = Math.max(0, 1 - p.life / p.ttl); + p.x += p.vx * 0.016; + p.y += p.vy * 0.016; + p.vx *= 0.92; + p.vy *= 0.92; + + if (a <= 0.02) { + particles.splice(i, 1); + continue; + } + + if (mode === "neon") { + ctx.shadowBlur = 14 * a; + ctx.shadowColor = `hsla(${p.hue}, 95%, 62%, ${0.65 * a})`; + ctx.fillStyle = `hsla(${p.hue}, 95%, 62%, ${0.55 * a})`; + } else { + ctx.shadowBlur = 0; + ctx.fillStyle = `rgba(54, 42, 28, ${0.18 * a})`; + } + + ctx.beginPath(); + ctx.arc(p.x, p.y, p.r, 0, Math.PI * 2); + ctx.fill(); + } + + ctx.restore(); + rafRef.current = window.requestAnimationFrame(tick); + }; + + rafRef.current = window.requestAnimationFrame(tick); + + return () => { + window.removeEventListener("resize", resize); + window.removeEventListener("pointermove", onMove); + if (rafRef.current) window.cancelAnimationFrame(rafRef.current); + rafRef.current = null; + particlesRef.current = []; + pointerRef.current = null; + }; + }, [enabled, mode]); + + return ( + + ); +} + diff --git a/ai_agents/playground/src/components/Doodler/TranscriptPanel.tsx b/ai_agents/playground/src/components/Doodler/TranscriptPanel.tsx new file mode 100644 index 0000000000..298400f56d --- /dev/null +++ b/ai_agents/playground/src/components/Doodler/TranscriptPanel.tsx @@ -0,0 +1,123 @@ +"use client"; + +import * as React from "react"; +import { useAppSelector, useAutoScroll } from "@/common"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { cn } from "@/lib/utils"; +import { EMessageDataType, EMessageType, type IChatItem } from "@/types"; + +function getTranscriptItems(items: IChatItem[]) { + return items.filter( + (i) => + i.data_type === EMessageDataType.TEXT && + typeof i.text === "string" && + i.text.trim().length > 0 + ); +} + +export default function TranscriptPanel(props: { + className?: string; + disabled?: boolean; + onSend?: (text: string) => Promise | void; + placeholder?: string; + style?: React.CSSProperties; +}) { + const { className, disabled = false, onSend, placeholder, style } = props; + const items = useAppSelector((s) => s.global.chatItems); + const transcript = React.useMemo(() => getTranscriptItems(items), [items]); + const containerRef = React.useRef(null); + const [value, setValue] = React.useState(""); + + useAutoScroll(containerRef); + + const canSend = !disabled && Boolean(onSend) && value.trim().length > 0; + + const onSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (!canSend || !onSend) return; + await onSend(value); + setValue(""); + }; + + return ( +
+
+
+ +

Transcript

+
+ + {transcript.length ? `${transcript.length}` : "—"} + +
+ +
+ {transcript.length ? ( +
+ {transcript.map((item, idx) => { + const isUser = item.type === EMessageType.USER; + return ( +
+
+
+ {isUser ? "You" : "Agent"} + {item.isFinal === false ? : null} +
+
+ {item.text} +
+
+
+ ); + })} +
+ ) : ( +

+ Your transcript will show up here. +

+ )} +
+ + {onSend ? ( +
+
+ setValue(e.target.value)} + placeholder={placeholder ?? "Type a prompt…"} + className="h-11 bg-white/70" + disabled={disabled} + /> + +
+
+ ) : null} +
+ ); +} From f2898734492dc3ec3eb42fbc6a290130d536d794 Mon Sep 17 00:00:00 2001 From: Elliot Chen Date: Thu, 25 Dec 2025 18:26:40 +0800 Subject: [PATCH 4/7] feat: migrate voice-image-kids example to doodler --- .../examples/{voice-image-kids => doodler}/.env.example | 0 .../examples/{voice-image-kids => doodler}/Dockerfile | 0 .../examples/{voice-image-kids => doodler}/README.md | 8 ++++---- .../examples/{voice-image-kids => doodler}/Taskfile.yml | 0 .../examples/{voice-image-kids => doodler}/tenapp/go.mod | 0 .../examples/{voice-image-kids => doodler}/tenapp/main.go | 0 .../tenapp/manifest-lock.json | 0 .../{voice-image-kids => doodler}/tenapp/manifest.json | 0 .../{voice-image-kids => doodler}/tenapp/property.json | 0 .../tenapp/scripts/install_python_deps.sh | 0 .../{voice-image-kids => doodler}/tenapp/scripts/start.sh | 0 .../tenapp/ten_packages/extension/main_python/README.md | 0 .../tenapp/ten_packages/extension/main_python/__init__.py | 0 .../tenapp/ten_packages/extension/main_python/addon.py | 0 .../ten_packages/extension/main_python/agent/__init__.py | 0 .../ten_packages/extension/main_python/agent/agent.py | 0 .../extension/main_python/agent/decorators.py | 0 .../ten_packages/extension/main_python/agent/events.py | 0 .../ten_packages/extension/main_python/agent/llm_exec.py | 0 .../tenapp/ten_packages/extension/main_python/config.py | 0 .../ten_packages/extension/main_python/extension.py | 0 .../tenapp/ten_packages/extension/main_python/helper.py | 0 .../ten_packages/extension/main_python/manifest.json | 0 .../ten_packages/extension/main_python/property.json | 0 .../extension/openai_gpt_image_python/README.md | 2 +- 25 files changed, 5 insertions(+), 5 deletions(-) rename ai_agents/agents/examples/{voice-image-kids => doodler}/.env.example (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/Dockerfile (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/README.md (97%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/Taskfile.yml (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/go.mod (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/main.go (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/manifest-lock.json (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/manifest.json (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/property.json (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/scripts/install_python_deps.sh (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/scripts/start.sh (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/README.md (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/__init__.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/addon.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/agent/__init__.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/agent/agent.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/agent/decorators.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/agent/events.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/agent/llm_exec.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/config.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/extension.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/helper.py (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/manifest.json (100%) rename ai_agents/agents/examples/{voice-image-kids => doodler}/tenapp/ten_packages/extension/main_python/property.json (100%) diff --git a/ai_agents/agents/examples/voice-image-kids/.env.example b/ai_agents/agents/examples/doodler/.env.example similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/.env.example rename to ai_agents/agents/examples/doodler/.env.example diff --git a/ai_agents/agents/examples/voice-image-kids/Dockerfile b/ai_agents/agents/examples/doodler/Dockerfile similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/Dockerfile rename to ai_agents/agents/examples/doodler/Dockerfile diff --git a/ai_agents/agents/examples/voice-image-kids/README.md b/ai_agents/agents/examples/doodler/README.md similarity index 97% rename from ai_agents/agents/examples/voice-image-kids/README.md rename to ai_agents/agents/examples/doodler/README.md index 29900a9ac3..479ae011f5 100644 --- a/ai_agents/agents/examples/voice-image-kids/README.md +++ b/ai_agents/agents/examples/doodler/README.md @@ -48,7 +48,7 @@ A delightful voice-to-image generation application built with TEN Framework. Kid ### 1. Clone and Navigate ```bash -cd ai_agents/agents/examples/voice-image-kids +cd ai_agents/agents/examples/doodler ``` ### 2. Set Environment Variables @@ -172,7 +172,7 @@ Edit `tenapp/property.json` to customize: ## Project Structure ``` -voice-image-kids/ +doodler/ ├── tenapp/ │ ├── property.json # Agent graph configuration │ ├── manifest.json # App metadata @@ -224,7 +224,7 @@ tman install ```bash cd ai_agents -docker build -f agents/examples/voice-image-kids/Dockerfile -t voice-image-kids . +docker build -f agents/examples/doodler/Dockerfile -t doodler . ``` ### Run Container @@ -234,7 +234,7 @@ docker run --rm -it --env-file .env \ -p 8080:8080 \ -p 3000:3000 \ -p 49483:49483 \ - voice-image-kids + doodler ``` ### Access diff --git a/ai_agents/agents/examples/voice-image-kids/Taskfile.yml b/ai_agents/agents/examples/doodler/Taskfile.yml similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/Taskfile.yml rename to ai_agents/agents/examples/doodler/Taskfile.yml diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/go.mod b/ai_agents/agents/examples/doodler/tenapp/go.mod similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/go.mod rename to ai_agents/agents/examples/doodler/tenapp/go.mod diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/main.go b/ai_agents/agents/examples/doodler/tenapp/main.go similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/main.go rename to ai_agents/agents/examples/doodler/tenapp/main.go diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json b/ai_agents/agents/examples/doodler/tenapp/manifest-lock.json similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/manifest-lock.json rename to ai_agents/agents/examples/doodler/tenapp/manifest-lock.json diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json b/ai_agents/agents/examples/doodler/tenapp/manifest.json similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/manifest.json rename to ai_agents/agents/examples/doodler/tenapp/manifest.json diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/property.json b/ai_agents/agents/examples/doodler/tenapp/property.json similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/property.json rename to ai_agents/agents/examples/doodler/tenapp/property.json diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh b/ai_agents/agents/examples/doodler/tenapp/scripts/install_python_deps.sh similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/scripts/install_python_deps.sh rename to ai_agents/agents/examples/doodler/tenapp/scripts/install_python_deps.sh diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh b/ai_agents/agents/examples/doodler/tenapp/scripts/start.sh similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/scripts/start.sh rename to ai_agents/agents/examples/doodler/tenapp/scripts/start.sh diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/README.md similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/README.md rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/README.md diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/__init__.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/__init__.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/__init__.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/addon.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/addon.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/addon.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/__init__.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/__init__.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/__init__.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/__init__.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/agent.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/agent.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/agent.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/decorators.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/decorators.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/decorators.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/events.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/events.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/events.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/llm_exec.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/agent/llm_exec.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/agent/llm_exec.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/config.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/config.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/config.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/extension.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/extension.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/extension.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/helper.py similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/helper.py rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/helper.py diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/manifest.json similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/manifest.json rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/manifest.json diff --git a/ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json b/ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/property.json similarity index 100% rename from ai_agents/agents/examples/voice-image-kids/tenapp/ten_packages/extension/main_python/property.json rename to ai_agents/agents/examples/doodler/tenapp/ten_packages/extension/main_python/property.json diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md index 32228aa8e3..05b16c176f 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/README.md @@ -208,7 +208,7 @@ For Azure OpenAI, configure: ## Examples -See the [voice-image-kids](../../../examples/voice-image-kids/) example for a complete implementation. +See the [doodler](../../../examples/doodler/) example for a complete implementation. ## License From e45c8db6858e6a17ac145873dee45ed66b32b5bd Mon Sep 17 00:00:00 2001 From: Elliot Chen Date: Fri, 26 Dec 2025 18:32:44 +0800 Subject: [PATCH 5/7] feat: enhance doodle styling and interaction --- .../examples/doodler/tenapp/property.json | 8 +- .../openai_gpt_image_python/extension.py | 9 ++- .../openai_gpt_image_python/manifest.json | 2 +- ai_agents/playground/next-env.d.ts | 2 +- ai_agents/playground/src/app/global.css | 73 ++++++++++++++++++- .../src/components/Doodler/AppShell.tsx | 42 +++++------ .../src/components/Doodler/BoardStage.tsx | 51 +++++-------- .../src/components/Doodler/Canvas.tsx | 10 +-- .../src/components/Doodler/ControlsBar.tsx | 15 ++-- .../src/components/Doodler/ImmersiveShell.tsx | 12 +-- .../Doodler/MagicCanvasBackground.tsx | 4 +- .../components/Doodler/MagicPenAnimator.tsx | 36 ++++----- .../components/Doodler/TranscriptPanel.tsx | 18 +++-- 13 files changed, 164 insertions(+), 118 deletions(-) diff --git a/ai_agents/agents/examples/doodler/tenapp/property.json b/ai_agents/agents/examples/doodler/tenapp/property.json index 7d9029bc49..edfd457b74 100644 --- a/ai_agents/agents/examples/doodler/tenapp/property.json +++ b/ai_agents/agents/examples/doodler/tenapp/property.json @@ -44,8 +44,8 @@ "api_key": "${env:OPENAI_API_KEY}", "model": "${env:OPENAI_MODEL|gpt-4o-mini}", "max_tokens": 512, - "prompt": "You are a friendly AI art assistant for kids! When children describe what they want to draw, help them create amazing images.\n\nGuidelines:\n- Keep responses short, fun, and encouraging\n- When they describe an image idea, use the generate_image tool immediately\n- Make prompts detailed and vivid (add colors, mood, style details)\n- After generating, celebrate their creativity!\n- If the image can't be created, gently suggest something similar\n\nExample:\nKid: \"I want a purple dragon!\"\nYou: \"Ooh, a purple dragon! Let me create that for you!\" [calls generate_image with detailed prompt]", - "greeting": "Hi there! I'm your AI art buddy! Tell me what you'd like me to draw for you!", + "prompt": "You are a friendly AI art assistant for kids! When children describe what they want to draw, help them create amazing doodles.\n\nGuidelines:\n- Keep responses short, fun, and encouraging\n- When they describe an image idea, use the generate_image tool immediately\n- Make prompts detailed and vivid (add colors, mood, style details)\n- Always say \"drawing\" instead of \"generating\"\n- After drawing, celebrate their creativity!\n- If the image can't be created, gently suggest something similar\n\nExample:\nKid: \"I want a purple dragon!\"\nYou: \"Ooh, a purple dragon! Let me draw that for you!\" [calls generate_image with detailed prompt]", + "greeting": "Hey doodle buddy! Ready to draw together? Tell me what you want to doodle!", "max_memory_length": 10 } }, @@ -85,7 +85,7 @@ "addon": "main_python", "extension_group": "control", "property": { - "greeting": "Hi there! I'm your AI art buddy! Tell me what you'd like me to draw for you!" + "greeting": "Hey doodle buddy! Ready to draw together? Tell me what you want to doodle!" } }, { @@ -235,4 +235,4 @@ ] } } -} \ No newline at end of file +} diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py index ab80de389f..1fe005d2e7 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/extension.py @@ -193,12 +193,13 @@ async def run_tool( }), ) doodle_modifier = ( - " in playful doodle style, hand-drawn, crayon-like, bold outlines, simple shapes, " - "kid-friendly and cheerful" + " in playful crayon doodle style on white paper, hand-drawn, bold uneven outlines, " + "simple shapes, flat colors, limited palette, minimal detail, no gradients, no 3D, " + "no realistic lighting, no photo realism, kid-friendly and cheerful" ) prompt = f"{prompt.strip()}{doodle_modifier}" - # Emit progress: queued → generating + # Emit progress: queued → drawing try: queued_msg = { "data_type": "raw", @@ -223,7 +224,7 @@ async def run_tool( generating_msg = { "data_type": "raw", "role": "assistant", - "text": json.dumps({"type": "progress", "data": {"phase": "generating", "pct": 50}}), + "text": json.dumps({"type": "progress", "data": {"phase": "drawing", "pct": 50}}), "text_ts": int(time.time() * 1000), "is_final": False, "stream_id": 100 diff --git a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json index a14144989c..91927d5a6e 100644 --- a/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json +++ b/ai_agents/agents/ten_packages/extension/openai_gpt_image_python/manifest.json @@ -155,4 +155,4 @@ } ] } -} \ No newline at end of file +} diff --git a/ai_agents/playground/next-env.d.ts b/ai_agents/playground/next-env.d.ts index 9edff1c7ca..c4b7818fbb 100644 --- a/ai_agents/playground/next-env.d.ts +++ b/ai_agents/playground/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -import "./.next/types/routes.d.ts"; +import "./.next/dev/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/ai_agents/playground/src/app/global.css b/ai_agents/playground/src/app/global.css index 32974ddccb..dc3883acf7 100644 --- a/ai_agents/playground/src/app/global.css +++ b/ai_agents/playground/src/app/global.css @@ -191,11 +191,68 @@ } .doodle-tile { - background-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%27http%3A//www.w3.org/2000/svg%27%20width%3D%27320%27%20height%3D%27320%27%20viewBox%3D%270%200%20320%20320%27%20fill%3D%27none%27%20stroke%3D%27%23111%27%20stroke-width%3D%273%27%20stroke-linecap%3D%27round%27%20stroke-linejoin%3D%27round%27%3E%3Ccircle%20cx%3D%2732%27%20cy%3D%2732%27%20r%3D%2710%27/%3E%3Cpath%20d%3D%27M32%206v8M32%2050v8M6%2032h8M50%2032h8M14%2014l6%206M44%2044l6%206M50%2014l-6%206M14%2050l6-6%27/%3E%3Cpath%20d%3D%27M96%2020l6%2014%2014%206-14%206-6%2014-6-14-14-6%2014-6%206-14z%27/%3E%3Cpath%20d%3D%27M144%2044l4%2010%2010%204-10%204-4%2010-4-10-10-4%2010-4%204-10z%27/%3E%3Cpath%20d%3D%27M232%2022c-8-10-24-6-24%206%200%2014%2024%2026%2024%2026s24-12%2024-26c0-12-16-16-24-6z%27/%3E%3Cpath%20d%3D%27M68%20104c0-8%208-14%2016-10%206-10%2024-6%2022%208%2010%202%2012%2016%200%2018-8%202-28%202-38-2-8-4-6-14%200-14z%27/%3E%3Cpath%20d%3D%27M200%2088l30-8%2020%2018-8%2024-26%204-18-18%202-20z%27/%3E%3Cpath%20d%3D%27M34%20170c24-18%2044-12%2062-2%27/%3E%3Cpath%20d%3D%27M90%20158l12%204-8%2010%27/%3E%3Cpath%20d%3D%27M140%20150c8-10%2024-14%2036-4-8%2012-20%2018-36%2012%200-4%202-6%204-8%27/%3E%3Cpath%20d%3D%27M220%20164l10%2018%2020%204-16%2010-4%2020-10-16-20-4%2016-10%204-20z%27/%3E%3Cpath%20d%3D%27M80%20228c-10-6-22-4-28%206%2010%208%2022%2010%2030%206%27/%3E%3Cpath%20d%3D%27M160%20230c-6-16%204-34%2022-38-16%2010-18%2032-4%2044-12%204-22%200-18-6z%27/%3E%3Cpath%20d%3D%27M230%20236l0%2030%27/%3E%3Cpath%20d%3D%27M218%20252l24%200%27/%3E%3Cpath%20d%3D%27M252%20244l12%208-12%208%27/%3E%3C/svg%3E"); + background-image: + url("data:image/svg+xml,%3Csvg%20xmlns%3D%27http%3A//www.w3.org/2000/svg%27%20width%3D%27320%27%20height%3D%27320%27%20viewBox%3D%270%200%20320%20320%27%20fill%3D%27none%27%20stroke%3D%27%23111%27%20stroke-width%3D%273%27%20stroke-linecap%3D%27round%27%20stroke-linejoin%3D%27round%27%3E%3Ccircle%20cx%3D%2732%27%20cy%3D%2732%27%20r%3D%2710%27/%3E%3Cpath%20d%3D%27M32%206v8M32%2050v8M6%2032h8M50%2032h8M14%2014l6%206M44%2044l6%206M50%2014l-6%206M14%2050l6-6%27/%3E%3Cpath%20d%3D%27M96%2020l6%2014%2014%206-14%206-6%2014-6-14-14-6%2014-6%206-14z%27/%3E%3Cpath%20d%3D%27M144%2044l4%2010%2010%204-10%204-4%2010-4-10-10-4%2010-4%204-10z%27/%3E%3Cpath%20d%3D%27M232%2022c-8-10-24-6-24%206%200%2014%2024%2026%2024%2026s24-12%2024-26c0-12-16-16-24-6z%27/%3E%3Cpath%20d%3D%27M68%20104c0-8%208-14%2016-10%206-10%2024-6%2022%208%2010%202%2012%2016%200%2018-8%202-28%202-38-2-8-4-6-14%200-14z%27/%3E%3Cpath%20d%3D%27M200%2088l30-8%2020%2018-8%2024-26%204-18-18%202-20z%27/%3E%3Cpath%20d%3D%27M34%20170c24-18%2044-12%2062-2%27/%3E%3Cpath%20d%3D%27M90%20158l12%204-8%2010%27/%3E%3Cpath%20d%3D%27M140%20150c8-10%2024-14%2036-4-8%2012-20%2018-36%2012%200-4%202-6%204-8%27/%3E%3Cpath%20d%3D%27M220%20164l10%2018%2020%204-16%2010-4%2020-10-16-20-4%2016-10%204-20z%27/%3E%3Cpath%20d%3D%27M80%20228c-10-6-22-4-28%206%2010%208%2022%2010%2030%206%27/%3E%3Cpath%20d%3D%27M160%20230c-6-16%204-34%2022-38-16%2010-18%2032-4%2044-12%204-22%200-18-6z%27/%3E%3Cpath%20d%3D%27M230%20236l0%2030%27/%3E%3Cpath%20d%3D%27M218%20252l24%200%27/%3E%3Cpath%20d%3D%27M252%20244l12%208-12%208%27/%3E%3C/svg%3E"); background-repeat: repeat; - background-size: 320px 320px; + background-size: 240px 240px; + background-position: 0 0; mix-blend-mode: multiply; - opacity: 0.16; + opacity: 0.24; + } + + .crayon-border { + position: relative; + border-radius: 22px; + border: 2px solid rgba(18, 18, 18, 0.55); + background-color: rgba(255, 255, 255, 0.85); + } + + .crayon-border::after { + content: ""; + position: absolute; + inset: -4px; + border-radius: inherit; + border: 2px dashed rgba(18, 18, 18, 0.2); + opacity: 0.6; + transform: rotate(-0.4deg); + pointer-events: none; + } + + .crayon-control { + position: relative; + border: 2px solid rgba(18, 18, 18, 0.55); + border-radius: 999px; + box-shadow: 0 3px 0 rgba(18, 18, 18, 0.08); + } + + .crayon-control::after { + content: ""; + position: absolute; + inset: -3px; + border-radius: inherit; + border: 2px dashed rgba(18, 18, 18, 0.2); + opacity: 0.55; + transform: rotate(-0.25deg); + pointer-events: none; + } + + .crayon-bubble { + position: relative; + border-radius: 18px; + border-width: 2px; + border-style: solid; + box-shadow: 0 3px 0 rgba(18, 18, 18, 0.08); + } + + .crayon-bubble::after { + content: ""; + position: absolute; + inset: -3px; + border-radius: inherit; + border: 2px dashed rgba(18, 18, 18, 0.18); + opacity: 0.55; + transform: rotate(0.3deg); + pointer-events: none; } .doodle-dream { @@ -386,6 +443,16 @@ position: relative; } + .toy-board-screen::after { + content: ""; + position: absolute; + inset: 10px; + border-radius: 30px; + border: 2px dashed rgba(17, 17, 17, 0.2); + box-shadow: 0 3px 0 rgba(18, 18, 18, 0.08); + pointer-events: none; + } + .toy-board-pen-slot { display: flex; align-items: center; diff --git a/ai_agents/playground/src/components/Doodler/AppShell.tsx b/ai_agents/playground/src/components/Doodler/AppShell.tsx index 5739018602..7753de22f6 100644 --- a/ai_agents/playground/src/components/Doodler/AppShell.tsx +++ b/ai_agents/playground/src/components/Doodler/AppShell.tsx @@ -1,27 +1,19 @@ "use client"; import * as React from "react"; -import { useAppDispatch, useAppSelector } from "@/common"; import { cn } from "@/lib/utils"; -import { - addChatItem, - setOptions, - setRoomConnected, -} from "@/store/reducers/global"; -import { EMessageDataType, EMessageType, type IChatItem } from "@/types"; +import { useAppDispatch, useAppSelector } from "@/common"; +import { addChatItem, setOptions, setRoomConnected } from "@/store/reducers/global"; import DoodleCanvas from "./Canvas"; import ControlsBar from "./ControlsBar"; import LoadingAnimator from "./LoadingAnimator"; +import { EMessageDataType, EMessageType, type IChatItem } from "@/types"; export default function AppShell() { const dispatch = useAppDispatch(); const options = useAppSelector((s) => s.global.options); - const [channel, setChannel] = React.useState( - options.channel || "voice_image_kids" - ); - const [userId, setUserId] = React.useState( - options.userId || Math.floor(100000 + Math.random() * 900000) - ); + const [channel, setChannel] = React.useState(options.channel || "voice_image_kids"); + const [userId, setUserId] = React.useState(options.userId || Math.floor(100000 + Math.random() * 900000)); const rtcRef = React.useRef(null); React.useEffect(() => { @@ -72,12 +64,12 @@ export default function AppShell() {

setChannel(e.target.value)} aria-label="Channel" - className={cn( - "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" - )} + className={cn( + "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" + )} /> setUserId(Number(e.target.value))} aria-label="User ID" - className={cn( - "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" - )} + className={cn( + "rounded-lg border border-[#2b2e35] bg-[#121316] px-3 py-2 text-sm text-white" + )} />

); } diff --git a/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx b/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx index 9b09c14249..bead74ed10 100644 --- a/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx +++ b/ai_agents/playground/src/components/Doodler/ImmersiveShell.tsx @@ -462,12 +462,12 @@ export default function ImmersiveShell() {