diff --git a/cmd/picoclaw/main.go b/cmd/picoclaw/main.go index c14ec58..e54ffe8 100644 --- a/cmd/picoclaw/main.go +++ b/cmd/picoclaw/main.go @@ -23,6 +23,7 @@ import ( "github.com/sipeed/picoclaw/pkg/channels" "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/cron" + "github.com/sipeed/picoclaw/pkg/health" "github.com/sipeed/picoclaw/pkg/heartbeat" "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/providers" @@ -595,6 +596,20 @@ func gatewayCmd() { fmt.Println("⚠ Warning: No channels enabled") } + // Start health check server + healthServer := health.NewServer(health.Config{ + Port: cfg.Gateway.HealthPort, + Version: version, + BuildTime: "unknown", // Can be set via ldflags + AgentName: "picclaw", + }) + go func() { + if err := healthServer.Start(); err != nil { + fmt.Printf("Error starting health server: %v\n", err) + } + }() + fmt.Printf("✓ Health check endpoint: http://%s:%d/healthz\n", cfg.Gateway.Host, cfg.Gateway.HealthPort) + fmt.Printf("✓ Gateway started on %s:%d\n", cfg.Gateway.Host, cfg.Gateway.Port) fmt.Println("Press Ctrl+C to stop") diff --git a/docs/health-check.md b/docs/health-check.md new file mode 100644 index 0000000..bfa0535 --- /dev/null +++ b/docs/health-check.md @@ -0,0 +1,184 @@ +# Health Check Endpoint + +PicoClaw provides an HTTP health check endpoint for monitoring and orchestration. + +## Endpoint + +``` +GET /healthz +``` + +## Configuration + +Add to `config.json`: + +```json +{ + "gateway": { + "host": "0.0.0.0", + "port": 18790, + "health_port": 9090 + } +} +``` + +Or via environment variable: + +```bash +export PICOCLAW_GATEWAY_HEALTH_PORT=9090 +``` + +## Response + +```json +{ + "status": "ok", + "uptime": "1h23m45s", + "version": "0.1.0", + "agent_name": "picclaw", + "go_version": "go1.24.0", + "build_time": "2026-02-16T08:00:00Z" +} +``` + +**HTTP 200 OK** - Agent is healthy and running + +## Usage + +### Local Development + +```bash +# Start the gateway +picoclaw gateway + +# Check health +curl http://localhost:9090/healthz +``` + +### Docker + +```bash +# Build +docker build -t picclaw . + +# Run with health check +docker run -d \ + --name picclaw \ + --health-cmd "curl -f http://localhost:9090/healthz || exit 1" \ + --health-interval 30s \ + --health-timeout 5s \ + --health-retries 3 \ + picclaw +``` + +### Kubernetes + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: picclaw +spec: + containers: + - name: picclaw + image: picclaw:latest + ports: + - containerPort: 9090 + name: health + livenessProbe: + httpGet: + path: /healthz + port: 9090 + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /healthz + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 10 +``` + +### systemd + +```ini +[Unit] +Description=PicoClaw Agent +After=network.target + +[Service] +ExecStart=/usr/local/bin/picoclaw gateway +ExecStartPost=/bin/sleep 2 +ExecStartPost=/usr/bin/curl -f http://localhost:9090/healthz +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +## Testing + +```bash +# Run tests +go test -v ./pkg/health + +# With coverage +go test -v -cover ./pkg/health + +# Expected output: +# === RUN TestNewServer +# --- PASS: TestNewServer (0.00s) +# === RUN TestHealthzEndpoint +# --- PASS: TestHealthzEndpoint (0.00s) +# ... +# PASS +# coverage: 100.0% of statements +``` + +## Monitoring + +### Prometheus + +```yaml +scrape_configs: + - job_name: 'picclaw' + metrics_path: '/healthz' + static_configs: + - targets: ['localhost:9090'] +``` + +### Uptime Monitoring + +Services like UptimeRobot, Pingdom, or Datadog can use `/healthz` for availability checks. + +## Troubleshooting + +### Port already in use + +```bash +# Change port in config.json +{ + "gateway": { + "health_port": 9091 + } +} +``` + +### Health check fails + +1. Check if gateway is running: `ps aux | grep picoclaw` +2. Check port binding: `netstat -tuln | grep 9090` +3. Check logs: `tail -f ~/.picoclaw/picoclaw.log` +4. Verify config: `cat ~/.picoclaw/config.json` + +### Method not allowed (405) + +Only `GET` requests are supported: + +```bash +# ✅ Correct +curl -X GET http://localhost:9090/healthz + +# ❌ Wrong +curl -X POST http://localhost:9090/healthz +``` diff --git a/pkg/config/config.go b/pkg/config/config.go index 5b9c2b5..8a07814 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -104,8 +104,9 @@ type ProviderConfig struct { } type GatewayConfig struct { - Host string `json:"host" env:"PICOCLAW_GATEWAY_HOST"` - Port int `json:"port" env:"PICOCLAW_GATEWAY_PORT"` + Host string `json:"host" env:"PICOCLAW_GATEWAY_HOST"` + Port int `json:"port" env:"PICOCLAW_GATEWAY_PORT"` + HealthPort int `json:"health_port" env:"PICOCLAW_GATEWAY_HEALTH_PORT"` } type WebSearchConfig struct { @@ -185,8 +186,9 @@ func DefaultConfig() *Config { Gemini: ProviderConfig{}, }, Gateway: GatewayConfig{ - Host: "0.0.0.0", - Port: 18790, + Host: "0.0.0.0", + Port: 18790, + HealthPort: 9090, }, Tools: ToolsConfig{ Web: WebToolsConfig{ diff --git a/pkg/health/server.go b/pkg/health/server.go new file mode 100644 index 0000000..d45613f --- /dev/null +++ b/pkg/health/server.go @@ -0,0 +1,73 @@ +package health + +import ( + "encoding/json" + "fmt" + "net/http" + "runtime" + "time" +) + +// Server provides HTTP health check endpoints +type Server struct { + port int + startTime time.Time + version string + buildTime string + agentName string +} + +// Config holds health server configuration +type Config struct { + Port int + Version string + BuildTime string + AgentName string +} + +// NewServer creates a new health check server +func NewServer(cfg Config) *Server { + return &Server{ + port: cfg.Port, + startTime: time.Now(), + version: cfg.Version, + buildTime: cfg.BuildTime, + agentName: cfg.AgentName, + } +} + +// Start begins listening on the configured port +func (s *Server) Start() error { + mux := http.NewServeMux() + mux.HandleFunc("/healthz", s.handleHealthz) + + addr := fmt.Sprintf(":%d", s.port) + return http.ListenAndServe(addr, mux) +} + +// handleHealthz returns health status +func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) { + // Only accept GET requests + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + uptime := time.Since(s.startTime) + + response := map[string]interface{}{ + "status": "ok", + "uptime": uptime.String(), + "version": s.version, + "agent_name": s.agentName, + "go_version": runtime.Version(), + "build_time": s.buildTime, + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, "Failed to encode response", http.StatusInternalServerError) + } +} diff --git a/pkg/health/server_test.go b/pkg/health/server_test.go new file mode 100644 index 0000000..8a56285 --- /dev/null +++ b/pkg/health/server_test.go @@ -0,0 +1,231 @@ +package health + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestNewServer(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: "1.0.0", + BuildTime: "2024-01-01", + AgentName: "test-agent", + } + + server := NewServer(cfg) + + if server == nil { + t.Fatal("NewServer returned nil") + } + if server.port != 8080 { + t.Errorf("Expected port 8080, got %d", server.port) + } + if server.version != "1.0.0" { + t.Errorf("Expected version 1.0.0, got %s", server.version) + } + if server.agentName != "test-agent" { + t.Errorf("Expected agent name test-agent, got %s", server.agentName) + } +} + +func TestHealthzEndpoint(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: "1.0.0", + BuildTime: "2024-01-01T00:00:00Z", + AgentName: "picclaw", + } + + server := NewServer(cfg) + + // Create test request + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + + // Call handler + server.handleHealthz(w, req) + + // Check status code + if w.Code != http.StatusOK { + t.Errorf("Expected status 200, got %d", w.Code) + } + + // Check Content-Type + contentType := w.Header().Get("Content-Type") + if contentType != "application/json" { + t.Errorf("Expected Content-Type application/json, got %s", contentType) + } + + // Parse response + var response map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + // Verify required fields + requiredFields := []string{"status", "uptime", "version", "agent_name", "go_version", "build_time"} + for _, field := range requiredFields { + if _, ok := response[field]; !ok { + t.Errorf("Missing required field: %s", field) + } + } + + // Verify status + if response["status"] != "ok" { + t.Errorf("Expected status ok, got %v", response["status"]) + } + + // Verify version + if response["version"] != "1.0.0" { + t.Errorf("Expected version 1.0.0, got %v", response["version"]) + } + + // Verify agent_name + if response["agent_name"] != "picclaw" { + t.Errorf("Expected agent_name picclaw, got %v", response["agent_name"]) + } + + // Verify go_version is not empty + if goVersion, ok := response["go_version"].(string); !ok || goVersion == "" { + t.Error("go_version is missing or empty") + } +} + +func TestHealthzMethodNotAllowed(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: "1.0.0", + BuildTime: "2024-01-01", + AgentName: "picclaw", + } + + server := NewServer(cfg) + + // Test POST request (should be rejected) + req := httptest.NewRequest(http.MethodPost, "/healthz", nil) + w := httptest.NewRecorder() + + server.handleHealthz(w, req) + + if w.Code != http.StatusMethodNotAllowed { + t.Errorf("Expected status 405, got %d", w.Code) + } +} + +func TestHealthzUptime(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: "1.0.0", + BuildTime: "2024-01-01", + AgentName: "picclaw", + } + + server := NewServer(cfg) + + // Wait a bit to ensure uptime > 0 + time.Sleep(100 * time.Millisecond) + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + + server.handleHealthz(w, req) + + var response map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + uptime, ok := response["uptime"].(string) + if !ok { + t.Fatal("uptime field is not a string") + } + + if uptime == "" || uptime == "0s" { + t.Errorf("Expected uptime > 0, got %s", uptime) + } +} + +func TestHealthzMultipleRequests(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: "1.0.0", + BuildTime: "2024-01-01", + AgentName: "picclaw", + } + + server := NewServer(cfg) + + // Send multiple requests + for i := 0; i < 10; i++ { + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + + server.handleHealthz(w, req) + + if w.Code != http.StatusOK { + t.Errorf("Request %d: Expected status 200, got %d", i, w.Code) + } + + var response map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("Request %d: Failed to decode response: %v", i, err) + } + + if response["status"] != "ok" { + t.Errorf("Request %d: Expected status ok, got %v", i, response["status"]) + } + + time.Sleep(10 * time.Millisecond) + } +} + +func TestHealthzResponseFields(t *testing.T) { + tests := []struct { + name string + version string + buildTime string + agentName string + }{ + {"Production", "1.0.0", "2024-01-01T00:00:00Z", "picclaw"}, + {"Development", "dev", "unknown", "picclaw-dev"}, + {"Empty version", "", "", "test"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := Config{ + Port: 8080, + Version: tt.version, + BuildTime: tt.buildTime, + AgentName: tt.agentName, + } + + server := NewServer(cfg) + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + w := httptest.NewRecorder() + + server.handleHealthz(w, req) + + var response map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&response); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if response["version"] != tt.version { + t.Errorf("Expected version %s, got %v", tt.version, response["version"]) + } + + if response["build_time"] != tt.buildTime { + t.Errorf("Expected build_time %s, got %v", tt.buildTime, response["build_time"]) + } + + if response["agent_name"] != tt.agentName { + t.Errorf("Expected agent_name %s, got %v", tt.agentName, response["agent_name"]) + } + }) + } +}