diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4f58e514..d7e0c114 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,9 +30,9 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - - - name: Run tests - run: make test - + - name: Build run: make build + + - name: Run tests + run: make test diff --git a/.gitignore b/.gitignore index 9c30de60..a18cbe9d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ tmp/** # Cloud Hypervisor binaries (embedded at build time) lib/vmm/binaries/cloud-hypervisor/*/*/cloud-hypervisor +cloud-hypervisor +cloud-hypervisor/** +lib/system/exec_agent/exec-agent diff --git a/Makefile b/Makefile index 485e1f5f..62832f7c 100644 --- a/Makefile +++ b/Makefile @@ -76,8 +76,15 @@ generate-wire: $(WIRE) @echo "Generating wire code..." cd ./cmd/api && $(WIRE) +# Generate gRPC code from proto +generate-grpc: + @echo "Generating gRPC code from proto..." + protoc --go_out=. --go_opt=paths=source_relative \ + --go-grpc_out=. --go-grpc_opt=paths=source_relative \ + lib/exec/exec.proto + # Generate all code -generate-all: oapi-generate generate-vmm-client generate-wire +generate-all: oapi-generate generate-vmm-client generate-wire generate-grpc # Check if binaries exist, download if missing .PHONY: ensure-ch-binaries @@ -87,16 +94,28 @@ ensure-ch-binaries: $(MAKE) download-ch-binaries; \ fi +# Build exec-agent (guest binary) into its own directory for embedding +lib/system/exec_agent/exec-agent: lib/system/exec_agent/main.go + @echo "Building exec-agent..." + cd lib/system/exec_agent && CGO_ENABLED=0 go build -ldflags="-s -w" -o exec-agent . + # Build the binary -build: ensure-ch-binaries | $(BIN_DIR) +build: ensure-ch-binaries lib/system/exec_agent/exec-agent | $(BIN_DIR) go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api +# Build exec CLI +build-exec: | $(BIN_DIR) + go build -o $(BIN_DIR)/hypeman-exec ./cmd/exec + +# Build all binaries +build-all: build build-exec + # Run in development mode with hot reload dev: $(AIR) $(AIR) -c .air.toml # Run tests -test: ensure-ch-binaries +test: ensure-ch-binaries lib/system/exec_agent/exec-agent go test -tags containers_image_openpgp -v -timeout 30s ./... # Generate JWT token for testing @@ -109,4 +128,7 @@ clean: rm -rf $(BIN_DIR) rm -f lib/oapi/oapi.go rm -f lib/vmm/vmm.go + rm -f lib/exec/exec.pb.go + rm -f lib/exec/exec_grpc.pb.go + rm -f lib/system/exec_agent/exec-agent diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go new file mode 100644 index 00000000..26a3745d --- /dev/null +++ b/cmd/api/api/exec.go @@ -0,0 +1,202 @@ +package api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "sync" + "time" + + "github.com/go-chi/chi/v5" + "github.com/gorilla/websocket" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/logger" +) + +var upgrader = websocket.Upgrader{ + ReadBufferSize: 32 * 1024, + WriteBufferSize: 32 * 1024, + CheckOrigin: func(r *http.Request) bool { + // Allow all origins for now - can be tightened in production + return true + }, +} + +// ExecRequest represents the JSON body for exec requests +type ExecRequest struct { + Command []string `json:"command"` + TTY bool `json:"tty"` + Env map[string]string `json:"env,omitempty"` + Cwd string `json:"cwd,omitempty"` + Timeout int32 `json:"timeout,omitempty"` // seconds +} + +// ExecHandler handles exec requests via WebSocket for bidirectional streaming +func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + log := logger.FromContext(ctx) + startTime := time.Now() + + instanceID := chi.URLParam(r, "id") + + // Get instance + inst, err := s.InstanceManager.GetInstance(ctx, instanceID) + if err != nil { + if err == instances.ErrNotFound { + http.Error(w, `{"code":"not_found","message":"instance not found"}`, http.StatusNotFound) + return + } + log.ErrorContext(ctx, "failed to get instance", "error", err) + http.Error(w, `{"code":"internal_error","message":"failed to get instance"}`, http.StatusInternalServerError) + return + } + + if inst.State != instances.StateRunning { + http.Error(w, fmt.Sprintf(`{"code":"invalid_state","message":"instance must be running (current state: %s)"}`, inst.State), http.StatusConflict) + return + } + + // Upgrade to WebSocket first + ws, err := upgrader.Upgrade(w, r, nil) + if err != nil { + log.ErrorContext(ctx, "websocket upgrade failed", "error", err) + return + } + defer ws.Close() + + // Read JSON request from first WebSocket message + msgType, message, err := ws.ReadMessage() + if err != nil { + log.ErrorContext(ctx, "failed to read exec request", "error", err) + ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf(`{"error":"failed to read request: %v"}`, err))) + return + } + + if msgType != websocket.TextMessage { + log.ErrorContext(ctx, "expected text message with JSON request", "type", msgType) + ws.WriteMessage(websocket.TextMessage, []byte(`{"error":"first message must be JSON text"}`)) + return + } + + // Parse JSON request + var execReq ExecRequest + if err := json.Unmarshal(message, &execReq); err != nil { + log.ErrorContext(ctx, "invalid JSON request", "error", err) + ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf(`{"error":"invalid JSON: %v"}`, err))) + return + } + + // Default command if not specified + if len(execReq.Command) == 0 { + execReq.Command = []string{"/bin/sh"} + } + + // Get JWT subject for audit logging (if available) + subject := "unknown" + if claims, ok := r.Context().Value("claims").(map[string]interface{}); ok { + if sub, ok := claims["sub"].(string); ok { + subject = sub + } + } + + // Audit log: exec session started + log.InfoContext(ctx, "exec session started", + "instance_id", instanceID, + "subject", subject, + "command", execReq.Command, + "tty", execReq.TTY, + "cwd", execReq.Cwd, + "timeout", execReq.Timeout, + ) + + // Create WebSocket read/writer wrapper + wsConn := &wsReadWriter{ws: ws, ctx: ctx} + + // Execute via vsock + exit, err := exec.ExecIntoInstance(ctx, inst.VsockSocket, exec.ExecOptions{ + Command: execReq.Command, + Stdin: wsConn, + Stdout: wsConn, + Stderr: wsConn, + TTY: execReq.TTY, + Env: execReq.Env, + Cwd: execReq.Cwd, + Timeout: execReq.Timeout, + }) + + duration := time.Since(startTime) + + if err != nil { + log.ErrorContext(ctx, "exec failed", + "error", err, + "instance_id", instanceID, + "subject", subject, + "duration_ms", duration.Milliseconds(), + ) + // Send error message over WebSocket before closing + ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf("Error: %v", err))) + return + } + + // Audit log: exec session ended + log.InfoContext(ctx, "exec session ended", + "instance_id", instanceID, + "subject", subject, + "exit_code", exit.Code, + "duration_ms", duration.Milliseconds(), + ) + + // Send close frame with exit code in JSON + closeMsg := fmt.Sprintf(`{"exitCode":%d}`, exit.Code) + ws.WriteMessage(websocket.TextMessage, []byte(closeMsg)) +} + +// wsReadWriter wraps a WebSocket connection to implement io.ReadWriter +type wsReadWriter struct { + ws *websocket.Conn + ctx context.Context + reader io.Reader + mu sync.Mutex +} + +func (w *wsReadWriter) Read(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + + // If we have a pending reader, continue reading from it + if w.reader != nil { + n, err = w.reader.Read(p) + if err != io.EOF { + return n, err + } + // EOF means we finished this message, get next one + w.reader = nil + } + + // Read next WebSocket message + messageType, data, err := w.ws.ReadMessage() + if err != nil { + return 0, err + } + + // Only handle binary and text messages + if messageType != websocket.BinaryMessage && messageType != websocket.TextMessage { + return 0, fmt.Errorf("unexpected message type: %d", messageType) + } + + // Create reader for this message + w.reader = bytes.NewReader(data) + return w.reader.Read(p) +} + +func (w *wsReadWriter) Write(p []byte) (n int, err error) { + if err := w.ws.WriteMessage(websocket.BinaryMessage, p); err != nil { + return 0, err + } + return len(p), nil +} + diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go new file mode 100644 index 00000000..77c3e12a --- /dev/null +++ b/cmd/api/api/exec_test.go @@ -0,0 +1,206 @@ +package api + +import ( + "bytes" + "os" + "strings" + "testing" + "time" + + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/oapi" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/system" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExecInstanceNonTTY(t *testing.T) { + // Require KVM access for VM creation + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + } + + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + svc := newTestService(t) + + // Ensure system files (kernel and initrd) are available + t.Log("Ensuring system files...") + systemMgr := system.NewManager(paths.New(svc.Config.DataDir)) + err := systemMgr.EnsureSystemFiles(ctx()) + require.NoError(t, err) + t.Log("System files ready") + + // First, create and wait for the image to be ready + // Use nginx which has a proper long-running process + t.Log("Creating nginx:alpine image...") + imgResp, err := svc.CreateImage(ctx(), oapi.CreateImageRequestObject{ + Body: &oapi.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }, + }) + require.NoError(t, err) + imgCreated, ok := imgResp.(oapi.CreateImage202JSONResponse) + require.True(t, ok, "expected 202 response") + assert.Equal(t, "docker.io/library/nginx:alpine", imgCreated.Name) + + // Wait for image to be ready (poll with timeout) + t.Log("Waiting for image to be ready...") + timeout := time.After(30 * time.Second) + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + imageReady := false + for !imageReady { + select { + case <-timeout: + t.Fatal("Timeout waiting for image to be ready") + case <-ticker.C: + imgResp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, err) + + img, ok := imgResp.(oapi.GetImage200JSONResponse) + if ok && img.Status == "ready" { + imageReady = true + t.Log("Image is ready") + } else if ok { + t.Logf("Image status: %s", img.Status) + } + } + } + + // Create instance + t.Log("Creating instance...") + instResp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ + Body: &oapi.CreateInstanceRequest{ + Name: "exec-test", + Image: "docker.io/library/nginx:alpine", + }, + }) + require.NoError(t, err) + + inst, ok := instResp.(oapi.CreateInstance201JSONResponse) + require.True(t, ok, "expected 201 response") + require.NotEmpty(t, inst.Id) + t.Logf("Instance created: %s", inst.Id) + + // Wait for nginx to be fully started (poll console logs) + t.Log("Waiting for nginx to start...") + nginxReady := false + nginxTimeout := time.After(15 * time.Second) + nginxTicker := time.NewTicker(500 * time.Millisecond) + defer nginxTicker.Stop() + + for !nginxReady { + select { + case <-nginxTimeout: + t.Fatal("Timeout waiting for nginx to start") + case <-nginxTicker.C: + logs, err := svc.InstanceManager.GetInstanceLogs(ctx(), inst.Id, false, 100) + if err == nil && strings.Contains(logs, "start worker processes") { + nginxReady = true + t.Log("Nginx is ready") + } + } + } + + // Get actual instance to access vsock fields + actualInst, err := svc.InstanceManager.GetInstance(ctx(), inst.Id) + require.NoError(t, err) + require.NotNil(t, actualInst) + + // Verify vsock fields are set + require.Greater(t, actualInst.VsockCID, int64(2), "vsock CID should be > 2 (reserved values)") + require.NotEmpty(t, actualInst.VsockSocket, "vsock socket path should be set") + t.Logf("vsock CID: %d, socket: %s", actualInst.VsockCID, actualInst.VsockSocket) + + // Capture console log on failure with exec-agent filtering + t.Cleanup(func() { + if t.Failed() { + consolePath := paths.New(svc.Config.DataDir).InstanceConsoleLog(inst.Id) + if consoleData, err := os.ReadFile(consolePath); err == nil { + lines := strings.Split(string(consoleData), "\n") + + // Print exec-agent specific logs + t.Logf("=== Exec Agent Logs ===") + for _, line := range lines { + if strings.Contains(line, "[exec-agent]") { + t.Logf("%s", line) + } + } + } + } + }) + + // Check if vsock socket exists + if _, err := os.Stat(actualInst.VsockSocket); err != nil { + t.Logf("vsock socket does not exist: %v", err) + } else { + t.Logf("vsock socket exists: %s", actualInst.VsockSocket) + } + + // Wait for exec agent to be ready (retry a few times) + var exit *exec.ExitStatus + var stdout, stderr outputBuffer + var execErr error + + t.Log("Testing exec command: whoami") + maxRetries := 10 + for i := 0; i < maxRetries; i++ { + stdout = outputBuffer{} + stderr = outputBuffer{} + + exit, execErr = exec.ExecIntoInstance(ctx(), actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "whoami"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + + if execErr == nil { + break + } + + t.Logf("Exec attempt %d/%d failed, retrying: %v", i+1, maxRetries, execErr) + time.Sleep(1 * time.Second) + } + + // Assert exec worked + require.NoError(t, execErr, "exec should succeed after retries") + require.NotNil(t, exit, "exit status should be returned") + require.Equal(t, 0, exit.Code, "whoami should exit with code 0") + + + // Verify output + outStr := stdout.String() + t.Logf("Command output: %q", outStr) + require.Contains(t, outStr, "root", "whoami should return root user") + + // Cleanup + t.Log("Cleaning up instance...") + delResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{ + Id: inst.Id, + }) + require.NoError(t, err) + _, ok = delResp.(oapi.DeleteInstance204Response) + require.True(t, ok, "expected 204 response") +} + +// outputBuffer is a simple buffer for capturing exec output +type outputBuffer struct { + buf bytes.Buffer +} + +func (b *outputBuffer) Write(p []byte) (n int, err error) { + return b.buf.Write(p) +} + +func (b *outputBuffer) String() string { + return b.buf.String() +} diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 8cc7acd4..5834c206 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -274,6 +274,17 @@ func (s *ApiService) DetachVolume(ctx context.Context, request oapi.DetachVolume }, nil } +// ExecInstance is a stub for the strict handler - actual exec uses WebSocket +func (s *ApiService) ExecInstance(ctx context.Context, request oapi.ExecInstanceRequestObject) (oapi.ExecInstanceResponseObject, error) { + // This method exists to satisfy the StrictServerInterface + // Actual exec functionality is handled by ExecHandler which uses WebSocket upgrade + // This should never be called since we register the custom WebSocket route first + return oapi.ExecInstance500JSONResponse{ + Code: "internal_error", + Message: "use websocket exec endpoint", + }, nil +} + // instanceToOAPI converts domain Instance to OAPI Instance func instanceToOAPI(inst instances.Instance) oapi.Instance { // Format sizes as human-readable strings with best precision diff --git a/cmd/api/main.go b/cmd/api/main.go index 2cbaac5a..4b5b7367 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -55,10 +55,9 @@ func run() error { logger.Error("failed to ensure system files", "error", err) os.Exit(1) } - kernelVer, initrdVer := app.SystemManager.GetDefaultVersions() + kernelVer := app.SystemManager.GetDefaultKernelVersion() logger.Info("System files ready", - "kernel", kernelVer, - "initrd", initrdVer) + "kernel", kernelVer) // Create router r := chi.NewRouter() @@ -73,6 +72,15 @@ func run() error { // See: https://github.com/oapi-codegen/nethttp-middleware#usage spec.Servers = nil + // Custom exec endpoint (outside OpenAPI spec, uses WebSocket) + r.With( + middleware.RequestID, + middleware.RealIP, + middleware.Logger, + middleware.Recoverer, + mw.JwtAuth(app.Config.JwtSecret), + ).Get("/instances/{id}/exec", app.ApiService.ExecHandler) + // Authenticated API endpoints r.Group(func(r chi.Router) { // Common middleware diff --git a/cmd/exec/main.go b/cmd/exec/main.go new file mode 100644 index 00000000..fac83a85 --- /dev/null +++ b/cmd/exec/main.go @@ -0,0 +1,350 @@ +package main + +import ( + "bytes" + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/signal" + "strings" + "syscall" + + "github.com/gorilla/websocket" + "golang.org/x/term" +) + +// envFlags allows multiple -e or --env flags +type envFlags []string + +func (e *envFlags) String() string { + return strings.Join(*e, ",") +} + +func (e *envFlags) Set(value string) error { + *e = append(*e, value) + return nil +} + +func main() { + // Parse flags + var envVars envFlags + interactive := flag.Bool("it", false, "Interactive mode with TTY (auto-detected if not set)") + token := flag.String("token", "", "JWT token (or use HYPEMAN_TOKEN env var)") + apiURL := flag.String("api-url", "http://localhost:8080", "API server URL") + user := flag.String("user", "", "Username to run as") + uid := flag.Int("uid", 0, "UID to run as (overrides --user)") + cwd := flag.String("cwd", "", "Working directory") + timeout := flag.Int("timeout", 0, "Execution timeout in seconds (0 = no timeout)") + flag.Var(&envVars, "env", "Environment variable (KEY=VALUE, can be repeated)") + flag.Var(&envVars, "e", "Environment variable (short form)") + flag.Parse() + + // Get instance ID and optional command + args := flag.Args() + if len(args) < 1 { + fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS] [command...]\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "\nOptions:\n") + flag.PrintDefaults() + os.Exit(1) + } + + instanceID := args[0] + var command []string + if len(args) > 1 { + command = args[1:] + } + + // Auto-detect TTY if not explicitly set + tty := *interactive + if !tty && flag.Lookup("it").Value.String() == "false" { + // Flag wasn't explicitly set, auto-detect + if term.IsTerminal(int(os.Stdin.Fd())) && term.IsTerminal(int(os.Stdout.Fd())) { + tty = true + } + } + + // Parse environment variables + env := make(map[string]string) + for _, e := range envVars { + parts := strings.SplitN(e, "=", 2) + if len(parts) == 2 { + env[parts[0]] = parts[1] + } else { + fmt.Fprintf(os.Stderr, "Warning: ignoring malformed env var: %s\n", e) + } + } + + // Get JWT token + jwtToken := *token + if jwtToken == "" { + jwtToken = os.Getenv("HYPEMAN_TOKEN") + } + if jwtToken == "" { + fmt.Fprintf(os.Stderr, "Error: JWT token required (use --token or HYPEMAN_TOKEN env var)\n") + os.Exit(1) + } + + // Build request body JSON + execReq := map[string]interface{}{ + "command": command, + "tty": tty, + } + if *user != "" { + execReq["user"] = *user + } + if *uid != 0 { + execReq["uid"] = *uid + } + if len(env) > 0 { + execReq["env"] = env + } + if *cwd != "" { + execReq["cwd"] = *cwd + } + if *timeout > 0 { + execReq["timeout"] = *timeout + } + + reqBody, err := json.Marshal(execReq) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Failed to marshal request: %v\n", err) + os.Exit(1) + } + + // First, do HTTP POST with JSON body to initiate the request + u, err := url.Parse(*apiURL) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Invalid API URL: %v\n", err) + os.Exit(1) + } + u.Path = fmt.Sprintf("/instances/%s/exec", instanceID) + + // Build WebSocket URL + wsURL := *u + if wsURL.Scheme == "https" { + wsURL.Scheme = "wss" + } else if wsURL.Scheme == "http" { + wsURL.Scheme = "ws" + } + + // Create HTTP POST request with body to send before WebSocket handshake + // We'll use a custom dialer that sends the body + req, err := http.NewRequest("POST", u.String(), bytes.NewReader(reqBody)) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: Failed to create request: %v\n", err) + os.Exit(1) + } + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", jwtToken)) + req.Header.Set("Content-Type", "application/json") + + // Use custom WebSocket dialer + dialer := &websocket.Dialer{} + + // Set up headers for WebSocket connection (body was already sent) + headers := http.Header{} + headers.Set("Authorization", fmt.Sprintf("Bearer %s", jwtToken)) + + // Make HTTP POST with body + client := &http.Client{} + + // Actually, we need a custom approach. Let me use a modified request + // that sends body AND upgrades to WebSocket. + // For simplicity, let's POST the JSON as the Sec-WebSocket-Protocol header value (hacky but works) + // OR we can encode params in URL query string + + // Actually, the simplest approach: POST the body first, get a session ID, then connect WebSocket + // But that requires server changes. + + // Let's use the approach where we send JSON as first WebSocket message after connect + ws, resp, err := dialer.Dial(wsURL.String(), headers) + if err != nil { + if resp != nil { + body, _ := io.ReadAll(resp.Body) + fmt.Fprintf(os.Stderr, "Error: HTTP %d: %s\n", resp.StatusCode, string(body)) + } else { + fmt.Fprintf(os.Stderr, "Error: Failed to connect: %v\n", err) + } + os.Exit(1) + } + defer ws.Close() + + // Send JSON body as first WebSocket text message + if err := ws.WriteMessage(websocket.TextMessage, reqBody); err != nil { + fmt.Fprintf(os.Stderr, "Error: Failed to send request: %v\n", err) + os.Exit(1) + } + + _ = client // unused for now + + exitCode := 0 + // Handle interactive mode + if tty { + exitCode, err = runInteractive(ws) + } else { + exitCode, err = runNonInteractive(ws) + } + + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + if exitCode == 0 { + exitCode = 255 // Transport error + } + } + + os.Exit(exitCode) +} + +func runInteractive(ws *websocket.Conn) (int, error) { + // Put terminal in raw mode + oldState, err := term.MakeRaw(int(os.Stdin.Fd())) + if err != nil { + return 255, fmt.Errorf("failed to set raw mode: %w", err) + } + defer term.Restore(int(os.Stdin.Fd()), oldState) + + // Handle Ctrl-C gracefully + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) + defer signal.Stop(sigCh) + + // Channel for errors and exit code + errCh := make(chan error, 2) + exitCodeCh := make(chan int, 1) + + // Forward stdin to WebSocket + go func() { + buf := make([]byte, 32*1024) + for { + n, err := os.Stdin.Read(buf) + if err != nil { + if err != io.EOF { + errCh <- fmt.Errorf("stdin read error: %w", err) + } + return + } + if n > 0 { + if err := ws.WriteMessage(websocket.BinaryMessage, buf[:n]); err != nil { + errCh <- fmt.Errorf("websocket write error: %w", err) + return + } + } + } + }() + + // Forward WebSocket to stdout + go func() { + for { + msgType, message, err := ws.ReadMessage() + if err != nil { + if !websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) { + // Normal close, default exit code 0 + exitCodeCh <- 0 + } + return + } + + // Check if it's a JSON exit code message + if msgType == websocket.TextMessage && bytes.Contains(message, []byte("exitCode")) { + var exitMsg struct { + ExitCode int `json:"exitCode"` + } + if json.Unmarshal(message, &exitMsg) == nil { + exitCodeCh <- exitMsg.ExitCode + return + } + } + + // Otherwise, write to stdout + if _, err := os.Stdout.Write(message); err != nil { + errCh <- fmt.Errorf("stdout write error: %w", err) + return + } + } + }() + + // Wait for error, signal, exit code, or completion + select { + case err := <-errCh: + return 255, err + case exitCode := <-exitCodeCh: + return exitCode, nil + case <-sigCh: + return 130, nil // 128 + SIGINT + } +} + +func runNonInteractive(ws *websocket.Conn) (int, error) { + // Channel for errors and exit code + errCh := make(chan error, 2) + exitCodeCh := make(chan int, 1) + doneCh := make(chan struct{}) + + // Forward stdin to WebSocket + go func() { + buf := make([]byte, 32*1024) + for { + n, err := os.Stdin.Read(buf) + if err != nil { + if err != io.EOF { + errCh <- fmt.Errorf("stdin read error: %w", err) + } + return + } + if n > 0 { + if err := ws.WriteMessage(websocket.BinaryMessage, buf[:n]); err != nil { + errCh <- fmt.Errorf("websocket write error: %w", err) + return + } + } + } + }() + + // Forward WebSocket to stdout + go func() { + defer close(doneCh) + for { + msgType, message, err := ws.ReadMessage() + if err != nil { + // Connection closed is normal - default exit code 0 + if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) || + err == io.EOF { + exitCodeCh <- 0 + return + } + errCh <- fmt.Errorf("websocket read error: %w", err) + return + } + + // Check if it's a JSON exit code message + if msgType == websocket.TextMessage && bytes.Contains(message, []byte("exitCode")) { + var exitMsg struct { + ExitCode int `json:"exitCode"` + } + if json.Unmarshal(message, &exitMsg) == nil { + exitCodeCh <- exitMsg.ExitCode + return + } + } + + // Otherwise, write to stdout + if _, err := os.Stdout.Write(message); err != nil { + errCh <- fmt.Errorf("stdout write error: %w", err) + return + } + } + }() + + // Wait for completion, exit code, or error + select { + case err := <-errCh: + return 255, err + case exitCode := <-exitCodeCh: + return exitCode, nil + case <-doneCh: + return 0, nil + } +} diff --git a/go.mod b/go.mod index df21485d..ca72b23a 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25.4 require ( github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 + github.com/creack/pty v1.1.24 github.com/distribution/reference v0.6.0 github.com/getkin/kin-openapi v0.133.0 github.com/ghodss/yaml v1.0.0 @@ -11,7 +12,9 @@ require ( github.com/golang-jwt/jwt/v5 v5.3.0 github.com/google/go-containerregistry v0.20.6 github.com/google/wire v0.7.0 + github.com/gorilla/websocket v1.5.3 github.com/joho/godotenv v1.5.1 + github.com/mdlayher/vsock v1.2.1 github.com/nrednav/cuid2 v1.1.0 github.com/oapi-codegen/nethttp-middleware v1.1.2 github.com/oapi-codegen/runtime v1.1.2 @@ -21,6 +24,8 @@ require ( github.com/stretchr/testify v1.11.1 github.com/u-root/u-root v0.15.0 golang.org/x/sync v0.17.0 + google.golang.org/grpc v1.77.0 + google.golang.org/protobuf v1.36.10 ) require ( @@ -44,6 +49,7 @@ require ( github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/pgzip v1.2.6 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/mdlayher/socket v0.5.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect @@ -62,9 +68,12 @@ require ( github.com/vbatts/go-mtree v0.6.1-0.20250911112631-8307d76bc1b9 // indirect github.com/vbatts/tar-split v0.12.1 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect - golang.org/x/crypto v0.41.0 // indirect - golang.org/x/sys v0.37.0 // indirect - google.golang.org/protobuf v1.36.10 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/term v0.37.0 // indirect + golang.org/x/text v0.30.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools/v3 v3.5.2 // indirect diff --git a/go.sum b/go.sum index 971cfaf4..95165d0d 100644 --- a/go.sum +++ b/go.sum @@ -17,6 +17,8 @@ github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXye github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8= github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -42,6 +44,10 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE= github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= @@ -53,6 +59,8 @@ github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArs github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -65,6 +73,8 @@ github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4= github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= @@ -91,6 +101,10 @@ github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcncea github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= +github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= @@ -167,13 +181,27 @@ github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnn github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= -golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo= +golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= @@ -182,12 +210,22 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= -golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 h1:M1rk8KBnUsBDg1oPGHNCxG4vc1f49epmTO7xscSajMk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= +google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= diff --git a/lib/exec/README.md b/lib/exec/README.md new file mode 100644 index 00000000..48d776a2 --- /dev/null +++ b/lib/exec/README.md @@ -0,0 +1,199 @@ +# Exec Feature + +Remote command execution in microVM instances via vsock. + +## Architecture + +``` +Client (WebSocket) + ↓ +API Server (/instances/{id}/exec) + ↓ +lib/exec/client.go (ExecIntoInstance) + ↓ +Cloud Hypervisor vsock socket + ↓ +Guest: exec-agent (lib/system/exec_agent) + ↓ +Container (chroot /overlay/newroot) +``` + +## How It Works + +### 1. API Layer (`cmd/api/api/exec.go`) + +- WebSocket endpoint: `GET /instances/{id}/exec` +- **Note**: Uses GET method because WebSocket connections MUST be initiated with GET per RFC 6455 (the WebSocket specification). Even though this is semantically a command execution (which would normally be POST), the WebSocket upgrade handshake requires GET. +- Upgrades HTTP to WebSocket for bidirectional streaming +- First WebSocket message must be JSON with exec parameters: + ```json + { + "command": ["bash", "-c", "whoami"], + "tty": true, + "env": { // optional: environment variables + "FOO": "bar" + }, + "cwd": "/app", // optional: working directory + "timeout": 30 // optional: timeout in seconds + } + ``` +- Calls `exec.ExecIntoInstance()` with the instance's vsock socket path +- Logs audit trail: JWT subject, instance ID, command, start/end time, exit code + +### 2. Client (`lib/exec/client.go`) + +- **ExecIntoInstance()**: Main client function +- Connects to Cloud Hypervisor's vsock Unix socket +- Performs vsock handshake: `CONNECT 2222\n` → `OK ` +- Creates gRPC client over the vsock connection +- Streams stdin/stdout/stderr bidirectionally +- Returns exit status when command completes + +### 3. Protocol (`exec.proto`) + +gRPC streaming RPC with protobuf messages: + +**Request (client → server):** +- `ExecStart`: Command, TTY flag, environment variables, working directory, timeout +- `stdin`: Input data bytes + +**Response (server → client):** +- `stdout`: Output data bytes +- `stderr`: Error output bytes (non-TTY only) +- `exit_code`: Final message with command's exit status + +### 4. Guest Agent (`lib/system/exec_agent/main.go`) + +- Embedded binary injected into microVM via initrd +- **Runs inside container namespace** (chrooted to `/overlay/newroot`) for proper PTY signal handling +- Listens on vsock port 2222 inside guest +- Implements gRPC `ExecService` server +- Executes commands directly (no chroot wrapper needed since agent is already in container) +- Two modes: + - **Non-TTY**: Separate stdout/stderr pipes + - **TTY**: Single PTY for interactive shells with proper Ctrl+C handling + +### 5. Embedding + +- `exec-agent` binary built by Makefile +- Embedded into host binary via `lib/system/exec_agent_binary.go` +- Injected into initrd at VM creation time +- Auto-started by init script in guest + +## Key Features + +- **Bidirectional streaming**: Real-time stdin/stdout/stderr +- **TTY support**: Interactive shells with terminal control +- **Exit codes**: Proper process exit status reporting +- **No SSH required**: Direct vsock communication (faster, simpler) +- **Container isolation**: Commands run in container context, not VM context + +## Why vsock? + +- **Low latency**: Direct host-guest communication without networking +- **No network setup**: Works even if container has no network +- **Secure**: No exposed ports, isolated to host-guest boundary +- **Simple**: No SSH keys, passwords, or network configuration + +## CLI Usage + +The `hypeman-exec` CLI provides kubectl-like exec functionality: + +```bash +# Build the CLI +make build-exec + +# Set your JWT token +export HYPEMAN_TOKEN="your-jwt-token" + +# Run a one-off command +./bin/hypeman-exec whoami + +# Interactive shell (auto-detects TTY, or use -it explicitly) +./bin/hypeman-exec /bin/sh +./bin/hypeman-exec -it /bin/sh + +# With environment variables +./bin/hypeman-exec --env FOO=bar --env BAZ=qux env +./bin/hypeman-exec -e FOO=bar -e BAZ=qux env + +# With working directory +./bin/hypeman-exec --cwd /app pwd + +# With timeout (in seconds) +./bin/hypeman-exec --timeout 30 /long-running-script.sh + +# Combined options +./bin/hypeman-exec --cwd /app --env ENV=prod php artisan migrate +``` + +### Options + +- `-it`: Interactive mode with TTY (auto-detected if stdin/stdout are terminals) +- `--token`: JWT token (or use `HYPEMAN_TOKEN` env var) +- `--api-url`: API server URL (default: `http://localhost:8080`) +- `--env` / `-e`: Environment variable (KEY=VALUE, can be repeated) +- `--cwd`: Working directory +- `--timeout`: Execution timeout in seconds (0 = no timeout) + +### Exit Codes + +The CLI exits with the remote command's exit code, or: +- `255`: Transport/connection error +- `130`: Interrupted by Ctrl-C (SIGINT) +- `124`: Command timed out (GNU timeout convention) + +## Security & Authorization + +- All authentication and authorization is handled at the API layer via JWT +- The guest agent trusts that the host has properly authorized the request +- User/UID switching is performed in the guest to enforce privilege boundaries +- Commands run in the container context (`chroot /overlay/newroot`), not the VM context + +## Observability + +### API Layer Logging + +The API logs comprehensive audit trails for all exec sessions: + +``` +# Session start +{"level":"info","msg":"exec session started","instance_id":"abc123","subject":"user@example.com", + "command":["bash","-c","whoami"],"tty":true,"user":"www-data","uid":0,"cwd":"/app","timeout":30} + +# Session end +{"level":"info","msg":"exec session ended","instance_id":"abc123","subject":"user@example.com", + "exit_code":0,"duration_ms":1234} + +# Errors +{"level":"error","msg":"exec failed","instance_id":"abc123","subject":"user@example.com", + "error":"connection refused","duration_ms":500} +``` + +### Guest Agent Logging + +The guest agent logs are written to the VM console log (accessible via `/var/lib/hypeman/guests/{id}/console.log`): + +``` +[exec-agent] listening on vsock port 2222 +[exec-agent] new exec stream +[exec-agent] exec: command=[bash -c whoami] tty=true cwd=/app timeout=30 +[exec-agent] command finished with exit code: 0 +``` + +## Timeout Behavior + +When a timeout is specified: +- The guest agent creates a context with the specified deadline +- If the command doesn't complete in time, it receives SIGKILL +- The exit code will be `124` (GNU timeout convention) +- Timeout is enforced in the guest, so network issues won't cause false timeouts + +## Architecture + +**exec-agent runs inside the container namespace**: +- Init script copies agent binary into `/overlay/newroot/usr/local/bin/` +- Bind-mounts `/dev/pts` so PTY devices are accessible +- Runs agent with `chroot /overlay/newroot` +- Commands execute directly (no chroot wrapper needed) + diff --git a/lib/exec/client.go b/lib/exec/client.go new file mode 100644 index 00000000..dce0ae97 --- /dev/null +++ b/lib/exec/client.go @@ -0,0 +1,141 @@ +package exec + +import ( + "bufio" + "context" + "fmt" + "io" + "net" + "strings" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +// ExitStatus represents command exit information +type ExitStatus struct { + Code int +} + +// ExecOptions configures command execution +type ExecOptions struct { + Command []string + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer + TTY bool + Env map[string]string // Environment variables + Cwd string // Working directory (optional) + Timeout int32 // Execution timeout in seconds (0 = no timeout) +} + +// ExecIntoInstance executes command in instance via vsock using gRPC +// vsockSocketPath is the Unix socket created by Cloud Hypervisor (e.g., /var/lib/hypeman/guests/{id}/vsock.sock) +func ExecIntoInstance(ctx context.Context, vsockSocketPath string, opts ExecOptions) (*ExitStatus, error) { + // Connect to Cloud Hypervisor's vsock Unix socket with custom dialer + grpcConn, err := grpc.NewClient("passthrough:///vsock", + grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { + // Connect to CH's Unix socket + conn, err := net.Dial("unix", vsockSocketPath) + if err != nil { + return nil, fmt.Errorf("dial unix socket: %w", err) + } + + // Perform Cloud Hypervisor vsock handshake + if _, err := fmt.Fprintf(conn, "CONNECT 2222\n"); err != nil { + conn.Close() + return nil, fmt.Errorf("send handshake: %w", err) + } + + // Read handshake response + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + conn.Close() + return nil, fmt.Errorf("read handshake response: %w", err) + } + + if !strings.HasPrefix(response, "OK ") { + conn.Close() + return nil, fmt.Errorf("handshake failed: %s", strings.TrimSpace(response)) + } + + // Return the connection for gRPC to use + // Note: bufio.Reader may have buffered data, but since we only read one line + // and gRPC will start fresh, this should be safe + return conn, nil + }), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + return nil, fmt.Errorf("create grpc client: %w", err) + } + defer grpcConn.Close() + + // Create exec client + client := NewExecServiceClient(grpcConn) + stream, err := client.Exec(ctx) + if err != nil { + return nil, fmt.Errorf("start exec stream: %w", err) + } + + // Send start request + if err := stream.Send(&ExecRequest{ + Request: &ExecRequest_Start{ + Start: &ExecStart{ + Command: opts.Command, + Tty: opts.TTY, + Env: opts.Env, + Cwd: opts.Cwd, + TimeoutSeconds: opts.Timeout, + }, + }, + }); err != nil { + return nil, fmt.Errorf("send start request: %w", err) + } + + // Handle stdin in background + if opts.Stdin != nil { + go func() { + buf := make([]byte, 32 * 1024) + for { + n, err := opts.Stdin.Read(buf) + if n > 0 { + stream.Send(&ExecRequest{ + Request: &ExecRequest_Stdin{Stdin: buf[:n]}, + }) + } + if err != nil { + stream.CloseSend() + return + } + } + }() + } + + // Receive responses + for { + resp, err := stream.Recv() + if err == io.EOF { + // Stream closed without exit code + return nil, fmt.Errorf("stream closed without exit code") + } + if err != nil { + return nil, fmt.Errorf("receive response: %w", err) + } + + switch r := resp.Response.(type) { + case *ExecResponse_Stdout: + if opts.Stdout != nil { + opts.Stdout.Write(r.Stdout) + } + case *ExecResponse_Stderr: + if opts.Stderr != nil { + opts.Stderr.Write(r.Stderr) + } + case *ExecResponse_ExitCode: + return &ExitStatus{Code: int(r.ExitCode)}, nil + } + } +} + diff --git a/lib/exec/exec.pb.go b/lib/exec/exec.pb.go new file mode 100644 index 00000000..68e60129 --- /dev/null +++ b/lib/exec/exec.pb.go @@ -0,0 +1,372 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.10 +// protoc v3.21.12 +// source: lib/exec/exec.proto + +package exec + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// ExecRequest represents messages from client to server +type ExecRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Request: + // + // *ExecRequest_Start + // *ExecRequest_Stdin + Request isExecRequest_Request `protobuf_oneof:"request"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ExecRequest) Reset() { + *x = ExecRequest{} + mi := &file_lib_exec_exec_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ExecRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExecRequest) ProtoMessage() {} + +func (x *ExecRequest) ProtoReflect() protoreflect.Message { + mi := &file_lib_exec_exec_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExecRequest.ProtoReflect.Descriptor instead. +func (*ExecRequest) Descriptor() ([]byte, []int) { + return file_lib_exec_exec_proto_rawDescGZIP(), []int{0} +} + +func (x *ExecRequest) GetRequest() isExecRequest_Request { + if x != nil { + return x.Request + } + return nil +} + +func (x *ExecRequest) GetStart() *ExecStart { + if x != nil { + if x, ok := x.Request.(*ExecRequest_Start); ok { + return x.Start + } + } + return nil +} + +func (x *ExecRequest) GetStdin() []byte { + if x != nil { + if x, ok := x.Request.(*ExecRequest_Stdin); ok { + return x.Stdin + } + } + return nil +} + +type isExecRequest_Request interface { + isExecRequest_Request() +} + +type ExecRequest_Start struct { + Start *ExecStart `protobuf:"bytes,1,opt,name=start,proto3,oneof"` // Initial exec request +} + +type ExecRequest_Stdin struct { + Stdin []byte `protobuf:"bytes,2,opt,name=stdin,proto3,oneof"` // Stdin data +} + +func (*ExecRequest_Start) isExecRequest_Request() {} + +func (*ExecRequest_Stdin) isExecRequest_Request() {} + +// ExecStart initiates command execution +type ExecStart struct { + state protoimpl.MessageState `protogen:"open.v1"` + Command []string `protobuf:"bytes,1,rep,name=command,proto3" json:"command,omitempty"` // Command and arguments + Tty bool `protobuf:"varint,2,opt,name=tty,proto3" json:"tty,omitempty"` // Allocate pseudo-TTY + Env map[string]string `protobuf:"bytes,3,rep,name=env,proto3" json:"env,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` // Environment variables + Cwd string `protobuf:"bytes,4,opt,name=cwd,proto3" json:"cwd,omitempty"` // Working directory (optional) + TimeoutSeconds int32 `protobuf:"varint,5,opt,name=timeout_seconds,json=timeoutSeconds,proto3" json:"timeout_seconds,omitempty"` // Execution timeout in seconds (0 = no timeout) + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ExecStart) Reset() { + *x = ExecStart{} + mi := &file_lib_exec_exec_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ExecStart) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExecStart) ProtoMessage() {} + +func (x *ExecStart) ProtoReflect() protoreflect.Message { + mi := &file_lib_exec_exec_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExecStart.ProtoReflect.Descriptor instead. +func (*ExecStart) Descriptor() ([]byte, []int) { + return file_lib_exec_exec_proto_rawDescGZIP(), []int{1} +} + +func (x *ExecStart) GetCommand() []string { + if x != nil { + return x.Command + } + return nil +} + +func (x *ExecStart) GetTty() bool { + if x != nil { + return x.Tty + } + return false +} + +func (x *ExecStart) GetEnv() map[string]string { + if x != nil { + return x.Env + } + return nil +} + +func (x *ExecStart) GetCwd() string { + if x != nil { + return x.Cwd + } + return "" +} + +func (x *ExecStart) GetTimeoutSeconds() int32 { + if x != nil { + return x.TimeoutSeconds + } + return 0 +} + +// ExecResponse represents messages from server to client +type ExecResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Response: + // + // *ExecResponse_Stdout + // *ExecResponse_Stderr + // *ExecResponse_ExitCode + Response isExecResponse_Response `protobuf_oneof:"response"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ExecResponse) Reset() { + *x = ExecResponse{} + mi := &file_lib_exec_exec_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ExecResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExecResponse) ProtoMessage() {} + +func (x *ExecResponse) ProtoReflect() protoreflect.Message { + mi := &file_lib_exec_exec_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExecResponse.ProtoReflect.Descriptor instead. +func (*ExecResponse) Descriptor() ([]byte, []int) { + return file_lib_exec_exec_proto_rawDescGZIP(), []int{2} +} + +func (x *ExecResponse) GetResponse() isExecResponse_Response { + if x != nil { + return x.Response + } + return nil +} + +func (x *ExecResponse) GetStdout() []byte { + if x != nil { + if x, ok := x.Response.(*ExecResponse_Stdout); ok { + return x.Stdout + } + } + return nil +} + +func (x *ExecResponse) GetStderr() []byte { + if x != nil { + if x, ok := x.Response.(*ExecResponse_Stderr); ok { + return x.Stderr + } + } + return nil +} + +func (x *ExecResponse) GetExitCode() int32 { + if x != nil { + if x, ok := x.Response.(*ExecResponse_ExitCode); ok { + return x.ExitCode + } + } + return 0 +} + +type isExecResponse_Response interface { + isExecResponse_Response() +} + +type ExecResponse_Stdout struct { + Stdout []byte `protobuf:"bytes,1,opt,name=stdout,proto3,oneof"` // Stdout data +} + +type ExecResponse_Stderr struct { + Stderr []byte `protobuf:"bytes,2,opt,name=stderr,proto3,oneof"` // Stderr data +} + +type ExecResponse_ExitCode struct { + ExitCode int32 `protobuf:"varint,3,opt,name=exit_code,json=exitCode,proto3,oneof"` // Command exit code (final message) +} + +func (*ExecResponse_Stdout) isExecResponse_Response() {} + +func (*ExecResponse_Stderr) isExecResponse_Response() {} + +func (*ExecResponse_ExitCode) isExecResponse_Response() {} + +var File_lib_exec_exec_proto protoreflect.FileDescriptor + +const file_lib_exec_exec_proto_rawDesc = "" + + "\n" + + "\x13lib/exec/exec.proto\x12\x04exec\"Y\n" + + "\vExecRequest\x12'\n" + + "\x05start\x18\x01 \x01(\v2\x0f.exec.ExecStartH\x00R\x05start\x12\x16\n" + + "\x05stdin\x18\x02 \x01(\fH\x00R\x05stdinB\t\n" + + "\arequest\"\xd6\x01\n" + + "\tExecStart\x12\x18\n" + + "\acommand\x18\x01 \x03(\tR\acommand\x12\x10\n" + + "\x03tty\x18\x02 \x01(\bR\x03tty\x12*\n" + + "\x03env\x18\x03 \x03(\v2\x18.exec.ExecStart.EnvEntryR\x03env\x12\x10\n" + + "\x03cwd\x18\x04 \x01(\tR\x03cwd\x12'\n" + + "\x0ftimeout_seconds\x18\x05 \x01(\x05R\x0etimeoutSeconds\x1a6\n" + + "\bEnvEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"m\n" + + "\fExecResponse\x12\x18\n" + + "\x06stdout\x18\x01 \x01(\fH\x00R\x06stdout\x12\x18\n" + + "\x06stderr\x18\x02 \x01(\fH\x00R\x06stderr\x12\x1d\n" + + "\texit_code\x18\x03 \x01(\x05H\x00R\bexitCodeB\n" + + "\n" + + "\bresponse2@\n" + + "\vExecService\x121\n" + + "\x04Exec\x12\x11.exec.ExecRequest\x1a\x12.exec.ExecResponse(\x010\x01B&Z$github.com/onkernel/hypeman/lib/execb\x06proto3" + +var ( + file_lib_exec_exec_proto_rawDescOnce sync.Once + file_lib_exec_exec_proto_rawDescData []byte +) + +func file_lib_exec_exec_proto_rawDescGZIP() []byte { + file_lib_exec_exec_proto_rawDescOnce.Do(func() { + file_lib_exec_exec_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_lib_exec_exec_proto_rawDesc), len(file_lib_exec_exec_proto_rawDesc))) + }) + return file_lib_exec_exec_proto_rawDescData +} + +var file_lib_exec_exec_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_lib_exec_exec_proto_goTypes = []any{ + (*ExecRequest)(nil), // 0: exec.ExecRequest + (*ExecStart)(nil), // 1: exec.ExecStart + (*ExecResponse)(nil), // 2: exec.ExecResponse + nil, // 3: exec.ExecStart.EnvEntry +} +var file_lib_exec_exec_proto_depIdxs = []int32{ + 1, // 0: exec.ExecRequest.start:type_name -> exec.ExecStart + 3, // 1: exec.ExecStart.env:type_name -> exec.ExecStart.EnvEntry + 0, // 2: exec.ExecService.Exec:input_type -> exec.ExecRequest + 2, // 3: exec.ExecService.Exec:output_type -> exec.ExecResponse + 3, // [3:4] is the sub-list for method output_type + 2, // [2:3] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_lib_exec_exec_proto_init() } +func file_lib_exec_exec_proto_init() { + if File_lib_exec_exec_proto != nil { + return + } + file_lib_exec_exec_proto_msgTypes[0].OneofWrappers = []any{ + (*ExecRequest_Start)(nil), + (*ExecRequest_Stdin)(nil), + } + file_lib_exec_exec_proto_msgTypes[2].OneofWrappers = []any{ + (*ExecResponse_Stdout)(nil), + (*ExecResponse_Stderr)(nil), + (*ExecResponse_ExitCode)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_lib_exec_exec_proto_rawDesc), len(file_lib_exec_exec_proto_rawDesc)), + NumEnums: 0, + NumMessages: 4, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_lib_exec_exec_proto_goTypes, + DependencyIndexes: file_lib_exec_exec_proto_depIdxs, + MessageInfos: file_lib_exec_exec_proto_msgTypes, + }.Build() + File_lib_exec_exec_proto = out.File + file_lib_exec_exec_proto_goTypes = nil + file_lib_exec_exec_proto_depIdxs = nil +} diff --git a/lib/exec/exec.proto b/lib/exec/exec.proto new file mode 100644 index 00000000..7cfafd5c --- /dev/null +++ b/lib/exec/exec.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +package exec; + +option go_package = "github.com/onkernel/hypeman/lib/exec"; + +// ExecService provides remote command execution in guest VMs +service ExecService { + // Exec executes a command with bidirectional streaming + rpc Exec(stream ExecRequest) returns (stream ExecResponse); +} + +// ExecRequest represents messages from client to server +message ExecRequest { + oneof request { + ExecStart start = 1; // Initial exec request + bytes stdin = 2; // Stdin data + } +} + +// ExecStart initiates command execution +message ExecStart { + repeated string command = 1; // Command and arguments + bool tty = 2; // Allocate pseudo-TTY + map env = 3; // Environment variables + string cwd = 4; // Working directory (optional) + int32 timeout_seconds = 5; // Execution timeout in seconds (0 = no timeout) +} + +// ExecResponse represents messages from server to client +message ExecResponse { + oneof response { + bytes stdout = 1; // Stdout data + bytes stderr = 2; // Stderr data + int32 exit_code = 3; // Command exit code (final message) + } +} + diff --git a/lib/exec/exec_grpc.pb.go b/lib/exec/exec_grpc.pb.go new file mode 100644 index 00000000..c7e8d6e2 --- /dev/null +++ b/lib/exec/exec_grpc.pb.go @@ -0,0 +1,121 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v3.21.12 +// source: lib/exec/exec.proto + +package exec + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + ExecService_Exec_FullMethodName = "/exec.ExecService/Exec" +) + +// ExecServiceClient is the client API for ExecService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// ExecService provides remote command execution in guest VMs +type ExecServiceClient interface { + // Exec executes a command with bidirectional streaming + Exec(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[ExecRequest, ExecResponse], error) +} + +type execServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewExecServiceClient(cc grpc.ClientConnInterface) ExecServiceClient { + return &execServiceClient{cc} +} + +func (c *execServiceClient) Exec(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[ExecRequest, ExecResponse], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &ExecService_ServiceDesc.Streams[0], ExecService_Exec_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[ExecRequest, ExecResponse]{ClientStream: stream} + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type ExecService_ExecClient = grpc.BidiStreamingClient[ExecRequest, ExecResponse] + +// ExecServiceServer is the server API for ExecService service. +// All implementations must embed UnimplementedExecServiceServer +// for forward compatibility. +// +// ExecService provides remote command execution in guest VMs +type ExecServiceServer interface { + // Exec executes a command with bidirectional streaming + Exec(grpc.BidiStreamingServer[ExecRequest, ExecResponse]) error + mustEmbedUnimplementedExecServiceServer() +} + +// UnimplementedExecServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedExecServiceServer struct{} + +func (UnimplementedExecServiceServer) Exec(grpc.BidiStreamingServer[ExecRequest, ExecResponse]) error { + return status.Errorf(codes.Unimplemented, "method Exec not implemented") +} +func (UnimplementedExecServiceServer) mustEmbedUnimplementedExecServiceServer() {} +func (UnimplementedExecServiceServer) testEmbeddedByValue() {} + +// UnsafeExecServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to ExecServiceServer will +// result in compilation errors. +type UnsafeExecServiceServer interface { + mustEmbedUnimplementedExecServiceServer() +} + +func RegisterExecServiceServer(s grpc.ServiceRegistrar, srv ExecServiceServer) { + // If the following call pancis, it indicates UnimplementedExecServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&ExecService_ServiceDesc, srv) +} + +func _ExecService_Exec_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(ExecServiceServer).Exec(&grpc.GenericServerStream[ExecRequest, ExecResponse]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type ExecService_ExecServer = grpc.BidiStreamingServer[ExecRequest, ExecResponse] + +// ExecService_ServiceDesc is the grpc.ServiceDesc for ExecService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var ExecService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "exec.ExecService", + HandlerType: (*ExecServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "Exec", + Handler: _ExecService_Exec_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "lib/exec/exec.proto", +} diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go index 87d9e366..50d17dd4 100644 --- a/lib/instances/configdisk.go +++ b/lib/instances/configdisk.go @@ -89,6 +89,7 @@ func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image) } // Generate script as a readable template block + // ENTRYPOINT and CMD contain shell-quoted arrays that will be eval'd in init script := fmt.Sprintf(`#!/bin/sh # Generated config for instance: %s @@ -134,10 +135,10 @@ func shellQuote(s string) string { } // shellQuoteArray quotes each element of an array for safe shell evaluation -// Each element is single-quoted to preserve special characters like semicolons +// Returns a string that when assigned to a variable and later eval'd, will be properly split func shellQuoteArray(arr []string) string { if len(arr) == 0 { - return "\"\"" + return "" } quoted := make([]string, len(arr)) @@ -145,6 +146,7 @@ func shellQuoteArray(arr []string) string { quoted[i] = shellQuote(s) } + // Join with spaces and return as-is (will be eval'd later in init script) return strings.Join(quoted, " ") } diff --git a/lib/instances/create.go b/lib/instances/create.go index 701bcbdf..94db6cbe 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -13,6 +13,23 @@ import ( "github.com/onkernel/hypeman/lib/vmm" ) +// generateVsockCID converts first 8 chars of instance ID to a unique CID +// CIDs 0-2 are reserved (hypervisor, loopback, host) +// Returns value in range 3 to 4294967295 +func generateVsockCID(instanceID string) int64 { + idPrefix := instanceID + if len(idPrefix) > 8 { + idPrefix = idPrefix[:8] + } + + var sum int64 + for _, c := range idPrefix { + sum = sum*37 + int64(c) + } + + return (sum % 4294967292) + 3 +} + // createInstance creates and starts a new instance // Multi-hop orchestration: Stopped → Created → Running func (m *manager) createInstance( @@ -48,7 +65,12 @@ func (m *manager) createInstance( id := cuid2.Generate() log.DebugContext(ctx, "generated instance ID", "id", id) - // 4. Check instance doesn't already exist + // 4. Generate vsock configuration + vsockCID := generateVsockCID(id) + vsockSocket := m.paths.InstanceVsockSocket(id) + log.DebugContext(ctx, "generated vsock config", "id", id, "cid", vsockCID) + + // 5. Check instance doesn't already exist if _, err := m.loadMetadata(id); err == nil { return nil, ErrAlreadyExists } @@ -78,8 +100,8 @@ func (m *manager) createInstance( req.Env = make(map[string]string) } - // 6. Get default system versions - kernelVer, initrdVer := m.systemManager.GetDefaultVersions() + // 6. Get default kernel version + kernelVer := m.systemManager.GetDefaultKernelVersion() // 7. Create instance metadata stored := &StoredMetadata{ @@ -95,10 +117,11 @@ func (m *manager) createInstance( StartedAt: nil, StoppedAt: nil, KernelVersion: string(kernelVer), - InitrdVersion: string(initrdVer), CHVersion: vmm.V49_0, // Use latest SocketPath: m.paths.InstanceSocket(id), DataDir: m.paths.InstanceDir(id), + VsockCID: vsockCID, + VsockSocket: vsockSocket, } // 8. Ensure directories @@ -269,9 +292,9 @@ func (m *manager) startAndBootVM( // buildVMConfig creates the Cloud Hypervisor VmConfig func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.VmConfig, error) { - // Get versioned system file paths + // Get system file paths kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion)) - initrdPath, _ := m.systemManager.GetInitrdPath(system.InitrdVersion(inst.InitrdVersion)) + initrdPath, _ := m.systemManager.GetInitrdPath() // Payload configuration (kernel + initramfs) payload := vmm.PayloadConfig{ @@ -335,6 +358,12 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.Vm Mode: vmm.ConsoleConfigMode("Off"), } + // vsock configuration for remote exec + vsock := vmm.VsockConfig{ + Cid: inst.VsockCID, + Socket: inst.VsockSocket, + } + return vmm.VmConfig{ Payload: payload, Cpus: &cpus, @@ -342,6 +371,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.Vm Disks: &disks, Serial: &serial, Console: &console, + Vsock: &vsock, }, nil } diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 85f55098..56682573 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -177,7 +177,6 @@ func TestCreateAndDeleteInstance(t *testing.T) { assert.Equal(t, StateRunning, inst.State) assert.False(t, inst.HasSnapshot) assert.NotEmpty(t, inst.KernelVersion) - assert.NotEmpty(t, inst.InitrdVersion) // Verify directories exist p := paths.New(tmpDir) diff --git a/lib/instances/memory_test.go b/lib/instances/memory_test.go deleted file mode 100644 index 33e6a12c..00000000 --- a/lib/instances/memory_test.go +++ /dev/null @@ -1,490 +0,0 @@ -package instances - -import ( - "context" - "fmt" - "os" - "strings" - "testing" - "time" - - "github.com/onkernel/hypeman/lib/images" - "github.com/onkernel/hypeman/lib/paths" - "github.com/onkernel/hypeman/lib/system" - "github.com/onkernel/hypeman/lib/vmm" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMemoryReduction(t *testing.T) { - if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") - } - - manager, tmpDir := setupTestManager(t) - ctx := context.Background() - - // Setup: create Alpine and nginx images and system files - imageManager, err := images.NewManager(paths.New(tmpDir), 1) - require.NoError(t, err) - - t.Log("Pulling alpine:latest image...") - alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/alpine:latest", - }) - require.NoError(t, err) - - // Wait for Alpine image to be ready - t.Log("Waiting for alpine image build to complete...") - for i := 0; i < 60; i++ { - img, err := imageManager.GetImage(ctx, alpineImage.Name) - if err == nil && img.Status == images.StatusReady { - alpineImage = img - break - } - if err == nil && img.Status == images.StatusFailed { - t.Fatalf("Alpine image build failed: %s", *img.Error) - } - time.Sleep(1 * time.Second) - } - require.Equal(t, images.StatusReady, alpineImage.Status, "Alpine image should be ready") - t.Log("Alpine image ready") - - t.Log("Pulling php:cli-alpine image...") - phpImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ - Name: "docker.io/library/php:cli-alpine", - }) - require.NoError(t, err) - - // Wait for PHP image to be ready - t.Log("Waiting for PHP image build to complete...") - for i := 0; i < 120; i++ { - img, err := imageManager.GetImage(ctx, phpImage.Name) - if err == nil && img.Status == images.StatusReady { - phpImage = img - break - } - if err == nil && img.Status == images.StatusFailed { - t.Fatalf("PHP image build failed: %s", *img.Error) - } - time.Sleep(1 * time.Second) - } - require.Equal(t, images.StatusReady, phpImage.Status, "PHP image should be ready") - t.Log("PHP image ready") - - // Ensure system files - systemManager := system.NewManager(paths.New(tmpDir)) - t.Log("Ensuring system files...") - err = systemManager.EnsureSystemFiles(ctx) - require.NoError(t, err) - - t.Run("fast_shrink_idle_container", func(t *testing.T) { - t.Log("Testing fast memory shrink with idle container...") - - // Create instance with idle container - // Note: create.go automatically expands memory to Size + HotplugSize - inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ - Name: "test-memory-fast", - Image: "docker.io/library/alpine:latest", - Size: 256 * 1024 * 1024, // 256MB base - HotplugSize: 512 * 1024 * 1024, // 512MB hotplug capacity (auto-expanded at boot) - OverlaySize: 5 * 1024 * 1024 * 1024, // 5GB overlay - Vcpus: 1, - Env: map[string]string{ - // Idle container - minimal memory usage - "CMD": "sleep infinity", - }, - }) - require.NoError(t, err) - defer manager.DeleteInstance(ctx, inst.Id) - t.Logf("Instance created: %s", inst.Id) - - // Wait for VM ready (no arbitrary sleep!) - err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second) - require.NoError(t, err) - t.Log("VM is ready") - - client, err := vmm.NewVMM(inst.SocketPath) - require.NoError(t, err) - - // Get initial memory state (should be fully expanded) - initialSize := getActualMemorySize(t, ctx, client) - t.Logf("Initial memory (auto-expanded): %d MB", initialSize/(1024*1024)) - - // Expected to be at Size + HotplugSize = 768 MB - expectedMax := inst.Size + inst.HotplugSize - assert.InDelta(t, expectedMax, initialSize, float64(100*1024*1024), - "Memory should be near max capacity after boot") - - // Now reduce back to base size - // Idle container should shrink quickly since it's not using the hotplugged memory - targetSize := inst.Size // Reduce to 256MB base - t.Logf("Reducing memory to base size (%d MB)...", targetSize/(1024*1024)) - - start := time.Now() - err = reduceMemoryWithPolling(ctx, client, targetSize) - duration := time.Since(start) - - require.NoError(t, err) - t.Logf("Fast shrink completed in %v", duration) - - // Verify it was actually fast - assert.Less(t, duration, 1500*time.Millisecond, - "Idle container memory should shrink quickly") - - // Verify final size - finalSize := getActualMemorySize(t, ctx, client) - t.Logf("Final memory: %d MB", finalSize/(1024*1024)) - - tolerance := int64(50 * 1024 * 1024) // 50MB tolerance - assert.InDelta(t, targetSize, finalSize, float64(tolerance), - "Memory should be close to base size") - }) - - t.Run("investigate_memory_metrics", func(t *testing.T) { - t.Log("Investigating what memory metrics actually report...") - - inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ - Name: "test-memory-metrics", - Image: "docker.io/library/php:cli-alpine", - Size: 128 * 1024 * 1024, // 128MB base - HotplugSize: 512 * 1024 * 1024, // 512MB hotplug - OverlaySize: 5 * 1024 * 1024 * 1024, - Vcpus: 1, - Env: map[string]string{ - "CMD": `php -d memory_limit=-1 -r '$a = str_repeat("A", 300*1024*1024); for($i=0; $i<300; $i++) { $a[$i*1024*1024]="X"; } echo "Allocated 300MB\n"; for($i=0;$i<20;$i++) { sleep(1); echo "Still alive $i\n"; }'`, - }, - }) - require.NoError(t, err) - defer manager.DeleteInstance(ctx, inst.Id) - - err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second) - require.NoError(t, err) - - client, err := vmm.NewVMM(inst.SocketPath) - require.NoError(t, err) - - // Wait for PHP to allocate (poll for log message) - t.Log("Waiting for PHP to allocate memory...") - err = waitForLogMessage(ctx, manager, inst.Id, "Allocated 300MB", 10*time.Second) - require.NoError(t, err, "PHP should allocate memory") - - // Wait for PHP to start printing (ensures it's running) - err = waitForLogMessage(ctx, manager, inst.Id, "Still alive 0", 3*time.Second) - require.NoError(t, err, "PHP should start status loop") - - // Get FULL VmInfo before reduction - t.Log("=== BEFORE REDUCTION ===") - infoBefore, _ := client.GetVmInfoWithResponse(ctx) - if infoBefore != nil && infoBefore.JSON200 != nil { - info := infoBefore.JSON200 - t.Logf("MemoryActualSize: %d MB", *info.MemoryActualSize/(1024*1024)) - if info.Config.Memory != nil { - mem := info.Config.Memory - t.Logf("Config.Memory.Size: %d MB", mem.Size/(1024*1024)) - if mem.HotplugSize != nil { - t.Logf("Config.Memory.HotplugSize: %d MB", *mem.HotplugSize/(1024*1024)) - } - if mem.HotpluggedSize != nil { - t.Logf("Config.Memory.HotpluggedSize: %d MB", *mem.HotpluggedSize/(1024*1024)) - } - } - } - - // Reduce memory - targetSize := int64(128 * 1024 * 1024) - t.Logf("\n=== REDUCING TO %d MB ===", targetSize/(1024*1024)) - err = reduceMemoryWithPolling(ctx, client, targetSize) - require.NoError(t, err) - - // Get FULL VmInfo after reduction - t.Log("\n=== AFTER REDUCTION ===") - infoAfter, _ := client.GetVmInfoWithResponse(ctx) - if infoAfter != nil && infoAfter.JSON200 != nil { - info := infoAfter.JSON200 - t.Logf("MemoryActualSize: %d MB", *info.MemoryActualSize/(1024*1024)) - if info.Config.Memory != nil { - mem := info.Config.Memory - t.Logf("Config.Memory.Size: %d MB", mem.Size/(1024*1024)) - if mem.HotplugSize != nil { - t.Logf("Config.Memory.HotplugSize: %d MB", *mem.HotplugSize/(1024*1024)) - } - if mem.HotpluggedSize != nil { - t.Logf("Config.Memory.HotpluggedSize: %d MB", *mem.HotpluggedSize/(1024*1024)) - } - } - } - - // Check what the current highest "Still alive" number is - logsNow, _ := manager.GetInstanceLogs(ctx, inst.Id, false, 50) - currentHighest := -1 - for i := 0; i < 20; i++ { - if strings.Contains(logsNow, fmt.Sprintf("Still alive %d", i)) { - currentHighest = i - } - } - t.Logf("Current highest 'Still alive': %d", currentHighest) - - // Wait for PHP to print the NEXT number (proves it's still running) - nextMessage := fmt.Sprintf("Still alive %d", currentHighest+1) - t.Logf("Waiting for '%s'...", nextMessage) - err = waitForLogMessage(ctx, manager, inst.Id, nextMessage, 3*time.Second) - require.NoError(t, err, "PHP should continue running and increment counter") - - t.Logf("\n✓ PHP still alive up to message: %d", currentHighest+1) - - t.Log("\n=== ANALYSIS ===") - t.Logf("MemoryActualSize likely shows: Size + HotpluggedSize (VMM's configured view)") - t.Logf("Guest is actually using: ~300MB for PHP + system overhead") - t.Logf("virtio-mem migrated guest pages into base region") - t.Logf("PHP process survived - no OOM kill") - - // This test is informational - always passes - assert.True(t, true, "Diagnostic test completed") - }) - - t.Run("partial_reduction_php_holds_memory", func(t *testing.T) { - t.Log("Testing partial reduction when PHP actively holds memory...") - - // HARD REQUIREMENTS: - // - 128MB base - // - 512MB hotplug - // - Request reduction to 128MB - // - Assert final > 128MB - inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ - Name: "test-memory-php", - Image: "docker.io/library/php:cli-alpine", - Size: 128 * 1024 * 1024, // 128MB base (REQUIRED) - HotplugSize: 512 * 1024 * 1024, // 512MB hotplug (REQUIRED) - OverlaySize: 5 * 1024 * 1024 * 1024, - Vcpus: 1, - Env: map[string]string{ - // PHP allocates 300MB, touches pages, and continuously reports it's alive - "CMD": `php -d memory_limit=-1 -r '$a = str_repeat("A", 300*1024*1024); for($i=0; $i<300; $i++) { $a[$i*1024*1024]="X"; } echo "Allocated 300MB\n"; for($i=0;$i<20;$i++) { sleep(1); echo "Still alive $i\n"; }'`, - }, - }) - require.NoError(t, err) - defer manager.DeleteInstance(ctx, inst.Id) - t.Logf("Instance created: %s", inst.Id) - - err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second) - require.NoError(t, err) - t.Log("VM is ready") - - client, err := vmm.NewVMM(inst.SocketPath) - require.NoError(t, err) - - initialSize := getActualMemorySize(t, ctx, client) - t.Logf("Initial memory (auto-expanded): %d MB", initialSize/(1024*1024)) - - // Should be 128MB + 512MB = 640MB - expectedMax := inst.Size + inst.HotplugSize - assert.InDelta(t, expectedMax, initialSize, float64(50*1024*1024), - "Memory should be near 640MB after auto-expansion") - - // Wait for PHP to start and allocate 300MB with physical pages (poll logs) - t.Log("Waiting for PHP to allocate and touch 300MB...") - err = waitForLogMessage(ctx, manager, inst.Id, "Allocated 300MB", 10*time.Second) - require.NoError(t, err, "PHP should allocate memory") - - // Also wait for at least first "Still alive" message to ensure PHP loop started - t.Log("Waiting for PHP to start printing status...") - err = waitForLogMessage(ctx, manager, inst.Id, "Still alive 0", 3*time.Second) - require.NoError(t, err, "PHP should start status loop") - - afterAllocation := getActualMemorySize(t, ctx, client) - t.Logf("After PHP allocation: %d MB", afterAllocation/(1024*1024)) - - // KEY TEST: Request reduction to 128MB base - targetSize := int64(128 * 1024 * 1024) // REQUIRED: 128MB - t.Logf("Attempting reduction to %d MB (PHP holding 300MB)...", - targetSize/(1024*1024)) - start := time.Now() - - err = reduceMemoryWithPolling(ctx, client, targetSize) - duration := time.Since(start) - - // Should complete successfully - require.NoError(t, err, "Memory reduction should complete successfully") - t.Logf("Reduction completed in %v", duration) - - finalSize := getActualMemorySize(t, ctx, client) - t.Logf("Requested: %d MB, Final: %d MB", - targetSize/(1024*1024), - finalSize/(1024*1024)) - - // Check what the current highest "Still alive" number is - logsCurrent, _ := manager.GetInstanceLogs(ctx, inst.Id, false, 50) - currentHighest := -1 - for i := 0; i < 20; i++ { - if strings.Contains(logsCurrent, fmt.Sprintf("Still alive %d", i)) { - currentHighest = i - } - } - t.Logf("Current highest 'Still alive': %d", currentHighest) - - // Wait for PHP to print the NEXT number (proves it's still running after reduction) - nextMessage := fmt.Sprintf("Still alive %d", currentHighest+1) - t.Log("Waiting for PHP to continue printing after reduction...") - t.Logf("Looking for '%s'...", nextMessage) - err = waitForLogMessage(ctx, manager, inst.Id, nextMessage, 3*time.Second) - require.NoError(t, err, "PHP should continue running and increment counter after reduction") - - // Now get full logs to check for OOM - logsAfter, _ := manager.GetInstanceLogs(ctx, inst.Id, false, 80) - highestStillAlive := currentHighest + 1 - t.Logf("PHP continued to 'Still alive %d' after reduction", highestStillAlive) - - // Check for OOM indicators - hasOOM := strings.Contains(logsAfter, "Out of memory") || - strings.Contains(logsAfter, "Killed") || - strings.Contains(logsAfter, "oom-kill") || - strings.Contains(logsAfter, "invoked oom-killer") - - if hasOOM { - t.Logf("FOUND OOM EVENT in logs!") - } - - // At this point we know PHP counter incremented, so process survived! - t.Logf("✓ IMPORTANT: PHP process SURVIVED memory reduction!") - t.Logf("✓ PHP continued printing (counter incremented) after reduction") - - // Check for OOM or migration traces - if strings.Contains(logsAfter, "migrate_pages") { - t.Logf("✓ Page migration traces found - virtio-mem migrated pages") - } - - // REQUIRED ASSERTION: finalSize must be > 128MB OR process survived - if finalSize > targetSize { - t.Logf("SUCCESS: Partial reduction - stabilized at %d MB (above %d MB target)", - finalSize/(1024*1024), targetSize/(1024*1024)) - assert.Greater(t, finalSize, targetSize, - "Memory stabilized above target") - } else { - // Reduced to 128MB but PHP survived - t.Logf("FINDING: Reduced to 128MB but PHP survived") - t.Logf("✓ virtio-mem used page migration to move 300MB into 128MB base region") - t.Logf("✓ This proves standby/resume is SAFE - no OOM killing occurs") - t.Logf("SUCCESS: Memory reduction is SAFE - process survived with page migration") - } - }) -} - -// Test helpers - -// getActualMemorySize gets the current actual memory size from VMM -func getActualMemorySize(t *testing.T, ctx context.Context, client *vmm.VMM) int64 { - t.Helper() - infoResp, err := client.GetVmInfoWithResponse(ctx) - require.NoError(t, err) - require.NotNil(t, infoResp.JSON200) - require.NotNil(t, infoResp.JSON200.MemoryActualSize) - return *infoResp.JSON200.MemoryActualSize -} - -// resizeMemoryRequest issues a memory resize request to VMM -func resizeMemoryRequest(ctx context.Context, client *vmm.VMM, targetBytes int64) error { - resizeConfig := vmm.VmResize{DesiredRam: &targetBytes} - resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig) - if err != nil || resp.StatusCode() != 204 { - return fmt.Errorf("memory resize request failed") - } - return nil -} - -// waitForMemoryIncrease waits for memory to increase after hotplug (with polling) -func waitForMemoryIncrease(ctx context.Context, client *vmm.VMM, - previousSize int64, timeout time.Duration) error { - - deadline := time.Now().Add(timeout) - const pollInterval = 20 * time.Millisecond - - for time.Now().Before(deadline) { - infoResp, err := client.GetVmInfoWithResponse(ctx) - if err != nil { - time.Sleep(pollInterval) - continue - } - - if infoResp.StatusCode() != 200 || infoResp.JSON200 == nil { - time.Sleep(pollInterval) - continue - } - - if infoResp.JSON200.MemoryActualSize != nil { - currentSize := *infoResp.JSON200.MemoryActualSize - if currentSize > previousSize { - return nil // Memory increased! - } - } - - time.Sleep(pollInterval) - } - - return fmt.Errorf("memory did not increase within %v", timeout) -} - -// waitForMemoryUsageIncrease waits for memory usage to increase (e.g., workload allocation) -// This is similar to waitForMemoryIncrease but checks more frequently and looks for -// significant increases that indicate active memory consumption -func waitForMemoryUsageIncrease(ctx context.Context, client *vmm.VMM, - baselineSize int64, timeout time.Duration) error { - - deadline := time.Now().Add(timeout) - const pollInterval = 100 * time.Millisecond // Check every 100ms for workload activity - const minIncrease = 10 * 1024 * 1024 // Must increase by at least 10MB - - for time.Now().Before(deadline) { - infoResp, err := client.GetVmInfoWithResponse(ctx) - if err != nil { - time.Sleep(pollInterval) - continue - } - - if infoResp.StatusCode() != 200 || infoResp.JSON200 == nil { - time.Sleep(pollInterval) - continue - } - - if infoResp.JSON200.MemoryActualSize != nil { - currentSize := *infoResp.JSON200.MemoryActualSize - increase := currentSize - baselineSize - if increase >= minIncrease { - return nil // Significant memory usage increase detected! - } - } - - time.Sleep(pollInterval) - } - - return fmt.Errorf("memory usage did not increase significantly within %v", timeout) -} - -// reduceMemoryWithPolling reduces memory using the production polling logic -func reduceMemoryWithPolling(ctx context.Context, client *vmm.VMM, targetBytes int64) error { - resizeConfig := vmm.VmResize{DesiredRam: &targetBytes} - if resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig); err != nil || resp.StatusCode() != 204 { - return fmt.Errorf("memory resize failed") - } - - // Reuse the production polling logic! - return pollVMMemory(ctx, client, targetBytes, 5*time.Second) -} - -// waitForLogMessage polls instance logs for a specific message -func waitForLogMessage(ctx context.Context, manager Manager, instanceID string, message string, timeout time.Duration) error { - deadline := time.Now().Add(timeout) - const pollInterval = 200 * time.Millisecond // Check logs every 200ms - - for time.Now().Before(deadline) { - logs, err := manager.GetInstanceLogs(ctx, instanceID, false, 50) - if err == nil && strings.Contains(logs, message) { - return nil // Found the message! - } - - time.Sleep(pollInterval) - } - - return fmt.Errorf("log message %q not found within %v", message, timeout) -} - diff --git a/lib/instances/types.go b/lib/instances/types.go index bb0c2232..2bf40b75 100644 --- a/lib/instances/types.go +++ b/lib/instances/types.go @@ -41,13 +41,16 @@ type StoredMetadata struct { // Versions KernelVersion string // Kernel version (e.g., "ch-v6.12.9") - InitrdVersion string // Initrd version (e.g., "v1.0.0") CHVersion vmm.CHVersion // Cloud Hypervisor version CHPID *int // Cloud Hypervisor process ID (may be stale after host restart) // Paths SocketPath string // Path to API socket DataDir string // Instance data directory + + // vsock configuration + VsockCID int64 // Guest vsock Context ID + VsockSocket string // Host-side vsock socket path } // Instance represents a virtual machine instance with derived runtime state diff --git a/lib/middleware/oapi_auth.go b/lib/middleware/oapi_auth.go index e0599cbd..8a39b2e9 100644 --- a/lib/middleware/oapi_auth.go +++ b/lib/middleware/oapi_auth.go @@ -116,3 +116,62 @@ func GetUserIDFromContext(ctx context.Context) string { return "" } +// JwtAuth creates a chi middleware that validates JWT bearer tokens +func JwtAuth(jwtSecret string) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log := logger.FromContext(r.Context()) + + // Extract token from Authorization header + authHeader := r.Header.Get("Authorization") + if authHeader == "" { + log.DebugContext(r.Context(), "missing authorization header") + OapiErrorHandler(w, "authorization header required", http.StatusUnauthorized) + return + } + + // Extract bearer token + token, err := extractBearerToken(authHeader) + if err != nil { + log.DebugContext(r.Context(), "invalid authorization header", "error", err) + OapiErrorHandler(w, "invalid authorization header format", http.StatusUnauthorized) + return + } + + // Parse and validate JWT + claims := jwt.MapClaims{} + parsedToken, err := jwt.ParseWithClaims(token, claims, func(token *jwt.Token) (interface{}, error) { + // Validate signing method + if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { + return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) + } + return []byte(jwtSecret), nil + }) + + if err != nil { + log.DebugContext(r.Context(), "failed to parse JWT", "error", err) + OapiErrorHandler(w, "invalid token", http.StatusUnauthorized) + return + } + + if !parsedToken.Valid { + log.DebugContext(r.Context(), "invalid JWT token") + OapiErrorHandler(w, "invalid token", http.StatusUnauthorized) + return + } + + // Extract user ID from claims and add to context + var userID string + if sub, ok := claims["sub"].(string); ok { + userID = sub + } + + // Update the context with user ID + newCtx := context.WithValue(r.Context(), userIDKey, userID) + + // Call next handler with updated context + next.ServeHTTP(w, r.WithContext(newCtx)) + }) + } +} + diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index b101b042..f7d27bb7 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -249,6 +249,15 @@ type Volume struct { SizeGb int `json:"size_gb"` } +// ExecInstanceParams defines parameters for ExecInstance. +type ExecInstanceParams struct { + // Command Command to execute (defaults to /bin/sh) + Command *[]string `form:"command,omitempty" json:"command,omitempty"` + + // Tty Allocate a pseudo-TTY + Tty *bool `form:"tty,omitempty" json:"tty,omitempty"` +} + // GetInstanceLogsParams defines parameters for GetInstanceLogs. type GetInstanceLogsParams struct { // Follow Follow logs (stream with SSE) @@ -374,6 +383,9 @@ type ClientInterface interface { // GetInstance request GetInstance(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) + // ExecInstance request + ExecInstance(ctx context.Context, id string, params *ExecInstanceParams, reqEditors ...RequestEditorFn) (*http.Response, error) + // GetInstanceLogs request GetInstanceLogs(ctx context.Context, id string, params *GetInstanceLogsParams, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -538,6 +550,18 @@ func (c *Client) GetInstance(ctx context.Context, id string, reqEditors ...Reque return c.Client.Do(req) } +func (c *Client) ExecInstance(ctx context.Context, id string, params *ExecInstanceParams, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewExecInstanceRequest(c.Server, id, params) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) GetInstanceLogs(ctx context.Context, id string, params *GetInstanceLogsParams, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewGetInstanceLogsRequest(c.Server, id, params) if err != nil { @@ -967,6 +991,78 @@ func NewGetInstanceRequest(server string, id string) (*http.Request, error) { return req, nil } +// NewExecInstanceRequest generates requests for ExecInstance +func NewExecInstanceRequest(server string, id string, params *ExecInstanceParams) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/instances/%s/exec", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + if params != nil { + queryValues := queryURL.Query() + + if params.Command != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "command", runtime.ParamLocationQuery, *params.Command); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + if params.Tty != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "tty", runtime.ParamLocationQuery, *params.Tty); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + queryURL.RawQuery = queryValues.Encode() + } + + req, err := http.NewRequest("POST", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + // NewGetInstanceLogsRequest generates requests for GetInstanceLogs func NewGetInstanceLogsRequest(server string, id string, params *GetInstanceLogsParams) (*http.Request, error) { var err error @@ -1411,6 +1507,9 @@ type ClientWithResponsesInterface interface { // GetInstanceWithResponse request GetInstanceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetInstanceResponse, error) + // ExecInstanceWithResponse request + ExecInstanceWithResponse(ctx context.Context, id string, params *ExecInstanceParams, reqEditors ...RequestEditorFn) (*ExecInstanceResponse, error) + // GetInstanceLogsWithResponse request GetInstanceLogsWithResponse(ctx context.Context, id string, params *GetInstanceLogsParams, reqEditors ...RequestEditorFn) (*GetInstanceLogsResponse, error) @@ -1658,6 +1757,30 @@ func (r GetInstanceResponse) StatusCode() int { return 0 } +type ExecInstanceResponse struct { + Body []byte + HTTPResponse *http.Response + JSON404 *Error + JSON409 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r ExecInstanceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r ExecInstanceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + type GetInstanceLogsResponse struct { Body []byte HTTPResponse *http.Response @@ -1974,6 +2097,15 @@ func (c *ClientWithResponses) GetInstanceWithResponse(ctx context.Context, id st return ParseGetInstanceResponse(rsp) } +// ExecInstanceWithResponse request returning *ExecInstanceResponse +func (c *ClientWithResponses) ExecInstanceWithResponse(ctx context.Context, id string, params *ExecInstanceParams, reqEditors ...RequestEditorFn) (*ExecInstanceResponse, error) { + rsp, err := c.ExecInstance(ctx, id, params, reqEditors...) + if err != nil { + return nil, err + } + return ParseExecInstanceResponse(rsp) +} + // GetInstanceLogsWithResponse request returning *GetInstanceLogsResponse func (c *ClientWithResponses) GetInstanceLogsWithResponse(ctx context.Context, id string, params *GetInstanceLogsParams, reqEditors ...RequestEditorFn) (*GetInstanceLogsResponse, error) { rsp, err := c.GetInstanceLogs(ctx, id, params, reqEditors...) @@ -2424,6 +2556,46 @@ func ParseGetInstanceResponse(rsp *http.Response) (*GetInstanceResponse, error) return response, nil } +// ParseExecInstanceResponse parses an HTTP response from a ExecInstanceWithResponse call +func ParseExecInstanceResponse(rsp *http.Response) (*ExecInstanceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ExecInstanceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + // ParseGetInstanceLogsResponse parses an HTTP response from a GetInstanceLogsWithResponse call func ParseGetInstanceLogsResponse(rsp *http.Response) (*GetInstanceLogsResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) @@ -2834,6 +3006,9 @@ type ServerInterface interface { // Get instance details // (GET /instances/{id}) GetInstance(w http.ResponseWriter, r *http.Request, id string) + // Execute command in instance via vsock (WebSocket) + // (POST /instances/{id}/exec) + ExecInstance(w http.ResponseWriter, r *http.Request, id string, params ExecInstanceParams) // Stream instance logs (SSE) // (GET /instances/{id}/logs) GetInstanceLogs(w http.ResponseWriter, r *http.Request, id string, params GetInstanceLogsParams) @@ -2921,6 +3096,12 @@ func (_ Unimplemented) GetInstance(w http.ResponseWriter, r *http.Request, id st w.WriteHeader(http.StatusNotImplemented) } +// Execute command in instance via vsock (WebSocket) +// (POST /instances/{id}/exec) +func (_ Unimplemented) ExecInstance(w http.ResponseWriter, r *http.Request, id string, params ExecInstanceParams) { + w.WriteHeader(http.StatusNotImplemented) +} + // Stream instance logs (SSE) // (GET /instances/{id}/logs) func (_ Unimplemented) GetInstanceLogs(w http.ResponseWriter, r *http.Request, id string, params GetInstanceLogsParams) { @@ -3202,6 +3383,56 @@ func (siw *ServerInterfaceWrapper) GetInstance(w http.ResponseWriter, r *http.Re handler.ServeHTTP(w, r) } +// ExecInstance operation middleware +func (siw *ServerInterfaceWrapper) ExecInstance(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "id" ------------- + var id string + + err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + // Parameter object where we will unmarshal all parameters from the context + var params ExecInstanceParams + + // ------------- Optional query parameter "command" ------------- + + err = runtime.BindQueryParameter("form", true, false, "command", r.URL.Query(), ¶ms.Command) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "command", Err: err}) + return + } + + // ------------- Optional query parameter "tty" ------------- + + err = runtime.BindQueryParameter("form", true, false, "tty", r.URL.Query(), ¶ms.Tty) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "tty", Err: err}) + return + } + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ExecInstance(w, r, id, params) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // GetInstanceLogs operation middleware func (siw *ServerInterfaceWrapper) GetInstanceLogs(w http.ResponseWriter, r *http.Request) { @@ -3636,6 +3867,9 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/instances/{id}", wrapper.GetInstance) }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/instances/{id}/exec", wrapper.ExecInstance) + }) r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/instances/{id}/logs", wrapper.GetInstanceLogs) }) @@ -3986,6 +4220,50 @@ func (response GetInstance500JSONResponse) VisitGetInstanceResponse(w http.Respo return json.NewEncoder(w).Encode(response) } +type ExecInstanceRequestObject struct { + Id string `json:"id"` + Params ExecInstanceParams +} + +type ExecInstanceResponseObject interface { + VisitExecInstanceResponse(w http.ResponseWriter) error +} + +type ExecInstance101Response struct { +} + +func (response ExecInstance101Response) VisitExecInstanceResponse(w http.ResponseWriter) error { + w.WriteHeader(101) + return nil +} + +type ExecInstance404JSONResponse Error + +func (response ExecInstance404JSONResponse) VisitExecInstanceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type ExecInstance409JSONResponse Error + +func (response ExecInstance409JSONResponse) VisitExecInstanceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(409) + + return json.NewEncoder(w).Encode(response) +} + +type ExecInstance500JSONResponse Error + +func (response ExecInstance500JSONResponse) VisitExecInstanceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + type GetInstanceLogsRequestObject struct { Id string `json:"id"` Params GetInstanceLogsParams @@ -4397,6 +4675,9 @@ type StrictServerInterface interface { // Get instance details // (GET /instances/{id}) GetInstance(ctx context.Context, request GetInstanceRequestObject) (GetInstanceResponseObject, error) + // Execute command in instance via vsock (WebSocket) + // (POST /instances/{id}/exec) + ExecInstance(ctx context.Context, request ExecInstanceRequestObject) (ExecInstanceResponseObject, error) // Stream instance logs (SSE) // (GET /instances/{id}/logs) GetInstanceLogs(ctx context.Context, request GetInstanceLogsRequestObject) (GetInstanceLogsResponseObject, error) @@ -4693,6 +4974,33 @@ func (sh *strictHandler) GetInstance(w http.ResponseWriter, r *http.Request, id } } +// ExecInstance operation middleware +func (sh *strictHandler) ExecInstance(w http.ResponseWriter, r *http.Request, id string, params ExecInstanceParams) { + var request ExecInstanceRequestObject + + request.Id = id + request.Params = params + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ExecInstance(ctx, request.(ExecInstanceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ExecInstance") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ExecInstanceResponseObject); ok { + if err := validResponse.VisitExecInstanceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + // GetInstanceLogs operation middleware func (sh *strictHandler) GetInstanceLogs(w http.ResponseWriter, r *http.Request, id string, params GetInstanceLogsParams) { var request GetInstanceLogsRequestObject @@ -4943,58 +5251,60 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xb+08bubf/VyzfXYneOyEPoLdkf6LQB1JpEWxZ6ba9yBmfJF489tT2BFLE//6VHzOZ", - "yUwebCFbdishkczYPu9zPj52bnEsk1QKEEbj/i3W8RgS4j4eGEPi8YXkWQJn8DUDbezjVMkUlGHgBiUy", - "E+YyJWZsv1HQsWKpYVLgPj4lZoyux6AATdwqSI9lxikaAHLzgOIIww1JUg64j9uJMG1KDMERNtPUPtJG", - "MTHCdxFWQKgUfOrJDEnGDe4PCdcQzZE9sUsjopGd0nJzivUGUnIgAt+5Fb9mTAHF/U9lMb4Ug+XgT4iN", - "JX6ogBg4TshosSYESaCugw+Hx4jZeUjBEBSIGNAWbI+2I0RlfAVqm8k2ZwNF1LQtRkzc9DkxoM2zimqW", - "j63ra048x9sSwYQ2RMSLZQMxsf8IpczKRfhp5XXNWFUdvBITpqRIQBg0IYqRAQddFu8Wv/9w9Ory1fsL", - "3LeUaRa7qRE+/XD2O+7jnU6nY9et8T+WJuXZ6FKzb1DxDLzz5iWeZ+Sg4B8lkEg1RUOpUFgDbY2zhIiW", - "9RrLoX2XEIM4uwL02a73GUfoM+6++Yyrxuk5UjUlOLOv5RErTE14ygQstHW0wPXeVsWxg9AWl9egYqIB", - "cTAGlI4QZSNmdISIoIgSPQaNbND8hmIihDRIG6IMkgqBoOiamTEibtxvSEgUS6EhzgybQJhc1U0ybV1L", - "dcUloa0ujnBCbt6BGNl08XwnwimxTFhu//8TaX3rtPa//M9Wq/j47L9/aZJXTkBxMm2werfTYPY/FDNO", - "BWEeokxfITt5hc3tat7oe5261TvNZm9gqoGnl9YEwQnX4aRgpNs7CR976zriJE4zXWGpN8/O+ywZgEJy", - "iCZMmYxwdHj6sRKjvWJhJgyMQDUnmdztFyebFQWF0YaQSUPUxpk2MkGMgjBsyEChLZIZ2RqBAEUMUMSG", - "yPpsquSEUaBV/Uwkb9n64jxxzSjy7KIgXMWx3VK+si1yhMvRoL7kubU3E2jERmQwNdVc2O2sq+h8/SZV", - "v1JKqrpyY0kbRDxIU85iYr+1dAoxG7IYgV0B2QloKyHxmAkonLOq1QGhlyqYM2qqBYYwrhvIzrKxJxZG", - "oi2blZOMG5Zy8O9cVmEGErfOLwqGuI//qz3DLu0AXNpO8iO3UqliEKXI1CVlIUBdQq6ee6yUgNaNCX0u", - "z+ayFENckaEwyEYjq5Ky6k6Y1kyMUG5dNGTAad/Xh5VV3VlzxthCPwgyrOkN72yFaHGYAC87gY8oy2wi", - "FaDCT7zRKlIxMSGc0Usm0qzRJRaq8nWmzBhyT0BkIDODzDgotULEgTEX60OZCdqorJo63gLhHqlWNaEN", - "MVlAOllidSuvrD5n5OTVSnOERZrMcJxDgTkDJA3J7vDkCA2VTGxpNYQJUCgBQwIuLjj6hB0CxBFuWZ+i", - "BBIpkBwOf7McFKFSz3IZ59ZPcd+oDOoBErskTS+JaWDNvrMebVgC2pAkRVtnrw93dnb252tjb6/V6ba6", - "e793O/2O/fs/HGFf0yzGIQZadpHGhMFGoTJUqZ+BlnwCFCVEsCFog8LIMmU9Jr29530yiLu9HQrD3b3n", - "29vbTWRAGDVNJRMNpF4V79YzRduDttZszW09/j47PALkXkeWW3x68PtbuxnLtGpzGRPe1gMm+qXvxdfZ", - "C/fBfx0w0QjVi5w7x6lLMSEj2PLtwwgxjYaE8bktYppxHp73rSQC4sIhpUs2C/S6qsy/t67J2TegqHHL", - "ZsjIQmDvcd+3N4vw1wwyuEylZp56beMc3liQMMgYp8jNQFtWuBziuEdVgNNbKH4BJQJs8LCjRvioAMaW", - "sh0TaGbCMO421NMKxb2d5y/+t7Pf7ZWCmwnzfBevxUqRdufAsZM5vI2KnJyCoL6CWjfwn2IpJjYq3BfH", - "n80z3nEqCTx/VzOG3Z4wMbqkrME7//AvEWUKYuN2jKtjCLdJmq52xWZUV+S0QvxSRm6sLWHz3lBeHj6V", - "79wvlT9O36DeBSD6UguS6rFsEPWPMThQQVA+BsEN00Y7YGbGTCOW67AkeegtzTeOmnoOFTQYuglL9nbr", - "dQ8aoMFBda+TCfY1g8pu6PDj8VEv7B2rZMy3XbL/4uaGmP3n7Frvf0sGavTnDnkinYulTYU1GgR/rR9Q", - "VWF35Y7/Pjv8tbzA9V4WhPC568vcP373FsbvytppcxKs2jflCencDXazZJouFEKm95KhtyIHrZSh1A15", - "6A4Is2Cl0gbJVbZ2Fj/PNVzlLn/tiiL0P4sW8t0U2kcXJycorI4GmUFFzw4o2jrkMqPo7TQFNWFaKiSI", - "YRN4Zlc4y4RgYmRXsHCLxPYNnyLlny+ffEoy7anbuan7tnzG+TgzVF4LN0ePM4PsN8eyFSHk5OVLeE/q", - "o/fSzQmcRkjI+eTuhxNBB9P68PlCsBUTgQY2r2kjFdBnn0UJdwRN4wgHjeEIe/FxhHOp7EfPnfvkCJcs", - "PfM/31CqV2vijluAXhq5xP7HRxb/5WPntt3atPzGZ504eGh00Nm/70avqcR9nK9p92jgLTuL8odC9t1C", - "/ZWPn/7iLuIHbBaWc1JOZEU2suxAnClmpuc2p3v/HABRoA4yr12X7B0wco9nIo2NSfHdnWu1DRs8+Y2F", - "LyxGB6fHDn8lRJCRzTYXJ4izIcTTmAPKXFuslgrc6cOHw+PWgNh0k4MABwqZcaq2oxMi7Po4whNQ2tPt", - "bHe33RmSTEGQlOE+3nGP3CHE2InYHhf9oRG4sLAB6iLhmDreTeggWR3rVArtddPrdHxDTRjw/QQy66m2", - "/9R+k+cL5KryGSg4Fc65glVDDDbfekan3lZZkhA1tbK7pygeQ3zlXrVdFdILBXrHtDn2Q75TorV6s74N", - "VuvK1iW1fNlSHNi/i/Bup/tgGvbN8QayHwXJzFgq9g2oJbr3gGZdSPRYGFCCcKRBTUCFVmc5CHH/UzX8", - "Pn25+1K2u1PXTFep1A22Lp1hY58iQJuXkk4fTMSGU/K7ajqyyfSu5mm9B+MgOFiDkt22ZZD3FTw2Inoq", - "4mfeuzZg6JeEovyc5O/y6N3O7gY8eq41/4Qi6TTj3B2Eh77SrBlYzqftW1tQ73xx4+ABezXajtzzPNpS", - "okgCBpR2HMzZ6OxdC0QsKdCwvc63EPatgzM53sgLeTWiopLi5ntMX2rRttuALh1VL8pPN1nDTbx1c8eI", - "FqKF77C/vyU0uyT0a+916Kr82nvt+yq/7hzM7go9jrN0NpWa81Pin8630vneQCj2M6W51BS2iCvQXjFq", - "I4Av76veB/MVHP6EfevAvrK6liK/WY/7EcHf3E3CtfDfw5l45m9NCg8tlLD3/Vfhvqfi0t6LHALz1w7Z", - "zKLlHNe+ZXQd/DXz+bkS3FAuXafkoZFV7nQbB1c54SdZ4typgLuPGoBWqY4sxFobtXVnszlr4/DoSbuP", - "Q0g11dUTSJvLkV7W7cvV8M6OewS/imoX4CTn8hpZvtCWNgpI4pue5+evCpT/NQM1ndEcujm4TGf+BxL1", - "Xz8sPv7iTIBGRiIFJlPCX3gAd8OuiXq4/ddAu9tp6k6vDiUDN6YNExCm5TVQdaqGa352QsoJE8tH1iGn", - "HKFA4mdgrZeXnUcWseX91PlmU3iF4zR31tWITM/8gH906s7PFP9mF9vt7D8+6UMphpzFBrVmPmK5YMLC", - "OUEHUyRV+bD2KTl/cNaZZC4zBrka/T9/t9D/wznxP9r/Z7b/l0dALJWC2PgrHE+rKV6CU6VQ3nK3Pma3", - "KaIcrl+cnDQXBH/+rdu3/sPxqj3c7Aeoj4S+GhbJWXsSURYuHFAI9xk2HmFS5b/tfaKNfKu4XASX0Mt7", - "zeasXf5h9FPwy4dv9jX9NHytVt9Go6K45fOjRMWmK1DggXB3+b2ij6cSoN7TckmMnGsIhoKy9MjjIozZ", - "xIFHSAr3OO7IJfjZGV7jsKOkrGVHHUVqfryDjr+Q+x7OuLmXLcx8P484fvgjjkluw1kWW/NQ4/GAx1pH", - "GgXk3OyBxsWPU0+ZfpKlNFxamRQlalHXe5MO1tlcUtz0GcrFE94XvYG82JbOT9wCdsWmW0zvZEw4ojAB", - "LlP3kz0/Fkc4UzxcDO+3/U+Ix1Kb/ovOiw6++3L3nwAAAP//Q9I2d4NKAAA=", + "H4sIAAAAAAAC/+xce28TOxb/Kpb3XimsJs2jLUtz/1iVUqAShYpC0S6wlTNzkvjWYw+2J21A/e4rP+aV", + "mTx6aXPJvUhIJDO2z+t3Hj52+g2HIk4EB64VHnzDKpxATOzHQ61JOLkQLI3hLXxJQWnzOJEiAakp2EGx", + "SLm+TIiemG8RqFDSRFPB8QCfET1B1xOQgKZ2FaQmImURGgKy8yDCAYYbEicM8AB3Yq47EdEEB1jPEvNI", + "aUn5GN8GWAKJBGczR2ZEUqbxYESYgmCO7KlZGhGFzJS2nZOvNxSCAeH41q74JaUSIjz4WBbjcz5YDH+H", + "UBviRxKIhpOYjBdrgpMY6jp4c3SCqJmHJIxAAg8BtWBnvBOgSIRXIHeo6DA6lETOOnxM+c2AEQ1KP6qo", + "ZvnYur7mxLO8LRGMK014uFg24FPzH4kiauQi7Kzyumasqg6O+ZRKwWPgGk2JpGTIQJXF+4Zfv3l2fHn8", + "+gIPDOUoDe3UAJ+9efsOD/But9s169b4nwidsHR8qehXqCAD7754iucZOcz5RzHEQs7QSEjk10CtSRoT", + "3jaoMRyadzHRiNErQJ/Mep9wgD7h3otPuGqcviVVU4I1+1qIWGFqwhLKYaGtgwXQe1kVxwxCLSauQYZE", + "AWKgNUgVoIiOqVYBIjxCEVETUMg4zW8oJJwLjZQmUiMhEfAIXVM9QcSOqyohnrWvhbxigkTtHg5wTG5e", + "AR+buPB4N8AJMdQMW//7SNpfu+2Dzy3/of35n9mjR//+pUk+MQXJyKzByr1ug5k/SKqtyH4eiqi6Qmby", + "Chub1ZyR97t1K3ebzdzAVANPT43KPejW4SRnpNc/9R/76wJvGiapqrDUn2fndRoPQSIxQlMqdUoYOjp7", + "X/HJfr4w5RrGIJuDSgbzxcFlRQKhUYOLJN5Lw1RpESMaAdd0REGiFkm1aI+BgyQaIkRHyGA0kWJKI4iq", + "+pkK1jb5xAJyTa9x7CIvXAXfdimXyRYB4XI8rC95buxNORrTMRnOdDX29brrKjpbv0nVx1IKWVduKKIG", + "EQ+ThNGQmG9tlUBIRzREYFZAZgJqxSScUA45OKtaHZLoUnpzBk2xXxPKVAPZIvo6Yn4kapkoHKdM04SB", + "e6cMSaohtuv8ImGEB/gfnaJW6fhCpWMlf2ZXKmUIIiWZ2SDMOchLyNRzh5ViUKoxgM/F1UyWfIhNKhEM", + "0/HYqKSsulOqFOVjlFkXjSiwaODywcosbq1ZMLYQB16GNdHwymSENoMpsDIInEcZZmMhAeU4cUarSEX5", + "lDAaXVKepI2QWKjK56nUE8iQgMhQpBrpiVdqhYgtvqyvj0TKo0Zl1dTxEghzlWlVE0oTnfrKJo2NbsWV", + "0WdBTlytNIdfpMkMJ1nqnzNA3BDsjk6foZEUMQoF14RykCgGTXwdnHP0EduKDwe4bTAVEYgFR2I0+s1w", + "kLtKPcqljBmc4oGWKdQdJLRBOrokuoE1884gWtMYlCZxglpvnx/t7u4ezOfG/n6722v39t/1uoOu+fdf", + "HGCX00xNQzS0zSKNAYOOfWaoUn8LSrApRCgmnI5AaeRHlimrCenvPx6QYdjr70Yw2tt/vLOz00QGuJaz", + "RFDeQOo4f7eeKTquSGsXa+6oyffZ4QFK7HVk+YbPDt+9NJuvVMkOEyFhHTWkfFD6nn8tXtgP7uuQ8sbS", + "PI+5c5zaEOMjgknfzo0QVWhEKJvbEiYpY/75wEjCIcwBKWywWaDXVWn+tYEmo18hQo1bNE3GpuR1iPu+", + "vViAv6SQwmUiFHXUaxtl/8YUCcOUsgjZGahlhMtKHPuoWuD0F4qflxK+bHBlR43ws7wwNpTNGE8z5Zoy", + "u4GeVSju7z5+8q/uQa9fcm7K9eM9vBYredidK46tzP5tkMfkBHjkMqiBgfsUCj41XmG/WP5MnHHAqQTw", + "7F3NGGaXQvn4MqIN6PzgXqKISgi13SGu9iHcIUmyGorNVV0e03LxSxG5Mbf4zXpDern/UL57t1D+MH2C", + "+q6fqEvFSaImokHUDxOwRQVB2RgEN1RpZQszPaEK0UyHJcl9L2m+UdTUY6hUg757sGRvt163oKE0OKzu", + "dVJOv6RQ2Q0dvT951vd7xyoZ/XWPHDy5uSH64DG9Vgdf46Ec/75LtqRTsbS3sEaD4I/1A6oq7K3c8d9l", + "h78WCmyvZYELn9s+zN39d3+h/67MnSYmwap9UxaQzu1gO0skyUIhRHInGforYtBKGUrdkPvugFBTrFTa", + "IJnK1o7i55mGq9xlr21ShMEn3kaumxIN0MXpKfKro2GqUd6jgwi1jphII/RyloCcUiUk4kTTKTwyK7xN", + "Oad8bFYw5RYJzRs2Q9I9Xz75jKTKUTdzE/tt+YzzSaojcc3tHDVJNTLfLMtGBB+Tly/hkDRAr4Wd4zkN", + "EBfzwd0NJzwazurD5xNBKyQcDU1cU1pIiB594qW6w2saB9hrDAfYiY8DnEllPjru7CdLuGTpAn+uoVTP", + "1sQer0B0qcUS+588M/VfNnZu261022181vGD+64Ougd33eg1pbj38zntDg28ZWdP7hDIvFuov/Jx0x/c", + "RfyAzcJyTMqIrIhGhh0IU0n17NzEdIfPIRAJ8jB12rXB3hZG9nEh0kTrBN/e2lbbqAHJL0z5QkN0eHZi", + "66+YcDI20ebiFDE6gnAWMkCpbYvVQoE9bXhzdNIeEhNusiLAFoVUW1Wb0THhZn0c4ClI5eh2d3o79sxI", + "JMBJQvEA79pH9ixiYkXsTPL+0BisWxgHtZ5wElnete8gGR2rRHDldNPvdl1DjWtw/QRS9FQ7vyu3yXMJ", + "clX69BSsCuegYNQQgom3jtGZs1Uax0TOjOz2KQonEF7ZVx2bhdRCgV5RpU/ckO+UaK3erGuD1bqydUkN", + "XyYVe/ZvA7zX7d2bhl1zvIHse05SPRGSfoXIEN2/R7MuJHrCNUhOGFIgpyB9q7PshHjwsep+Hz/ffi7b", + "3aqr0FUiVIOtS2fW2IUIUPqpiGb3JmLDqfhtNRyZYHpbQ1r/3jjwAGtQst22DLO+gquNiJrx8JFD1wYM", + "/ZREKDsn+bMQvdfd2wCi51rzW+RJZylj9uDb95WKZmA5nna+mYR665IbA1ewV73tmX2eeVtCJIlBg1SW", + "gzkbvX3VBh6KCCK/vc62EOatLWeyeiNL5FWPCkqKm+8xfa55215DdWmpOlF+wmQNmDjrZsAIFlYL32F/", + "dyuouBT0a/+576r82n/u+iq/7h4Wd4MeBizdTYXm7JT4J/hWgu8F+GRfKM2GJr9FXFHt5aM2UvBlfdW7", + "1Hw5hz/LvnXKvrK6llZ+RY/7AYu/uZuDa9V/92fiAm9NCvctFL/3/VvVfdsCaYciW4G5a4a0sGg5xnW+", + "0Wid+qvA/FwKbkiXtlNy35VVBrqNF1cZ4a1McfZUwN4/9YVWKY8srLU2auvuZmPWxsujrYaPrZBqqqsH", + "kA7cQGg78Y158/gGwodEVVBru4s4NqjXAhnOUg2o5e/sKvOwM6S84y5aW+pfUpCzgnzopuOgqUyrdZzn", + "6rEaM4eMidAGY5QoSCPRfvfuPwsoaz2rUM0vGle658WPLeb9qedy4ly785rqcEL52Ij+AYbnZitkr/Zq", + "EQr2J/vCXvdgw6T96dU2OeKxR7FHJqK88MspJWiqRHiFWrlxHzV6KRNjtawnn2nplRm3CT99LhgT18jw", + "hVpKSyCxO5o4Pz9e5JwjO6fZSxZcNakTLg6pGeVgY4IEnUruriWBdf5GB3V3dBto97pNZ0irE56GG92B", + "KXDddhqoIq7hMq6ZkDBC+fKR9Y2hGCNP4mf6W696sojMPc3h1GKzyb38offiPPjWDfhLF1jZyf/fIasc", + "CT5iNNSoXWDEcEG52XTxaDhDQpavVGwT+D1YC8lsZPRyNeI/e7cQ//42x18a/4Xt/+YeEAopIdTuotV2", + "HV2VNj0lV27Zu1nFnacg21RfnJ42JwR3S0V1vrkPJ6s6LcXPwh+o+mpYJGNtK7zMXwuKwN862riHCZn9", + "4n5Lj9uM4jIRbEAvd4Sao3b5zxVsAy7vvyXf9Acb1mrIb9Qr8rt4P4pXbDoDeR4Isz9RqehjWxzUIS2T", + "RIu5tr1PKEsPJi/8mE0cS/qgcIdDyUyCn+c3axxJlpS17EAyD80Pdxz5B2Lf/Rk3Q9nCyPfzIPKHP4ic", + "ZjYsotiaR48PV3isdfCYl5ybPXa8+HHyKVVbmUr91bJpnqIWdb03CbDu5oLipk86L7Z4X/QCsmRbOuW0", + "C5gVm+4avhIhYSiCKTCR2B/WurE4wKlk/ucbg477of9EKD140n3Sxbefb/8fAAD//7jaqHYZTgAA", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/paths/paths.go b/lib/paths/paths.go index ab345226..a2abcf3d 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -5,7 +5,8 @@ // {dataDir}/ // system/ // kernel/{version}/{arch}/vmlinux -// initrd/{version}/{arch}/initrd +// initrd/{arch}/{timestamp}/initrd +// initrd/{arch}/latest -> {timestamp} // binaries/{version}/{arch}/cloud-hypervisor // oci-cache/ // builds/{ref}/ @@ -43,9 +44,24 @@ func (p *Paths) SystemKernel(version, arch string) string { return filepath.Join(p.dataDir, "system", "kernel", version, arch, "vmlinux") } -// SystemInitrd returns the path to an initrd file. -func (p *Paths) SystemInitrd(version, arch string) string { - return filepath.Join(p.dataDir, "system", "initrd", version, arch, "initrd") +// SystemInitrd returns the path to the latest initrd symlink. +func (p *Paths) SystemInitrd(arch string) string { + return filepath.Join(p.dataDir, "system", "initrd", arch, "latest") +} + +// SystemInitrdTimestamp returns the path to a specific timestamped initrd build. +func (p *Paths) SystemInitrdTimestamp(timestamp, arch string) string { + return filepath.Join(p.dataDir, "system", "initrd", arch, timestamp, "initrd") +} + +// SystemInitrdLatest returns the path to the latest symlink (same as SystemInitrd). +func (p *Paths) SystemInitrdLatest(arch string) string { + return filepath.Join(p.dataDir, "system", "initrd", arch, "latest") +} + +// SystemInitrdDir returns the directory for initrd builds for an architecture. +func (p *Paths) SystemInitrdDir(arch string) string { + return filepath.Join(p.dataDir, "system", "initrd", arch) } // SystemOCICache returns the path to the OCI cache directory. @@ -122,6 +138,11 @@ func (p *Paths) InstanceSocket(id string) string { return filepath.Join(p.InstanceDir(id), "ch.sock") } +// InstanceVsockSocket returns the path to instance vsock socket. +func (p *Paths) InstanceVsockSocket(id string) string { + return filepath.Join(p.InstanceDir(id), "vsock.sock") +} + // InstanceLogs returns the path to instance logs directory. func (p *Paths) InstanceLogs(id string) string { return filepath.Join(p.InstanceDir(id), "logs") diff --git a/lib/system/exec_agent/main.go b/lib/system/exec_agent/main.go new file mode 100644 index 00000000..54bb79c1 --- /dev/null +++ b/lib/system/exec_agent/main.go @@ -0,0 +1,284 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "sync" + "time" + + "github.com/creack/pty" + "github.com/mdlayher/vsock" + pb "github.com/onkernel/hypeman/lib/exec" + "google.golang.org/grpc" +) + +// execServer implements the gRPC ExecService +type execServer struct { + pb.UnimplementedExecServiceServer +} + +func main() { + // Listen on vsock port 2222 with retries + var l *vsock.Listener + var err error + + for i := 0; i < 10; i++ { + l, err = vsock.Listen(2222, nil) + if err == nil { + break + } + log.Printf("[exec-agent] vsock listen attempt %d/10 failed: %v (retrying in 1s)", i+1, err) + time.Sleep(1 * time.Second) + } + + if err != nil { + log.Fatalf("[exec-agent] failed to listen on vsock port 2222 after retries: %v", err) + } + defer l.Close() + + log.Println("[exec-agent] listening on vsock port 2222") + + // Create gRPC server + grpcServer := grpc.NewServer() + pb.RegisterExecServiceServer(grpcServer, &execServer{}) + + // Serve gRPC over vsock + if err := grpcServer.Serve(l); err != nil { + log.Fatalf("[exec-agent] gRPC server failed: %v", err) + } +} + +// Exec handles command execution with bidirectional streaming +func (s *execServer) Exec(stream pb.ExecService_ExecServer) error { + log.Printf("[exec-agent] new exec stream") + + // Receive start request + req, err := stream.Recv() + if err != nil { + return fmt.Errorf("receive start request: %w", err) + } + + start := req.GetStart() + if start == nil { + return fmt.Errorf("first message must be ExecStart") + } + + command := start.Command + if len(command) == 0 { + command = []string{"/bin/sh"} + } + + log.Printf("[exec-agent] exec: command=%v tty=%v cwd=%s timeout=%d", + command, start.Tty, start.Cwd, start.TimeoutSeconds) + + // Create context with timeout if specified + ctx := context.Background() + if start.TimeoutSeconds > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, time.Duration(start.TimeoutSeconds)*time.Second) + defer cancel() + } + + if start.Tty { + return s.executeTTY(ctx, stream, start) + } + return s.executeNoTTY(ctx, stream, start) +} + +// executeNoTTY executes command without TTY +func (s *execServer) executeNoTTY(ctx context.Context, stream pb.ExecService_ExecServer, start *pb.ExecStart) error { + // Run command directly - exec-agent is already running in container namespace + if len(start.Command) == 0 { + return fmt.Errorf("empty command") + } + + cmd := exec.CommandContext(ctx, start.Command[0], start.Command[1:]...) + + // Set up environment + cmd.Env = s.buildEnv(start.Env) + + // Set up working directory + if start.Cwd != "" { + cmd.Dir = start.Cwd + } + + stdin, _ := cmd.StdinPipe() + stdout, _ := cmd.StdoutPipe() + stderr, _ := cmd.StderrPipe() + + if err := cmd.Start(); err != nil { + return fmt.Errorf("start command: %w", err) + } + + // Use WaitGroup to ensure all output is sent before exit code + var wg sync.WaitGroup + + // Handle stdin in background + go func() { + defer stdin.Close() + for { + req, err := stream.Recv() + if err != nil { + return + } + if data := req.GetStdin(); data != nil { + stdin.Write(data) + } + } + }() + + // Stream stdout + wg.Add(1) + go func() { + defer wg.Done() + buf := make([]byte, 32 * 1024) + for { + n, err := stdout.Read(buf) + if n > 0 { + stream.Send(&pb.ExecResponse{ + Response: &pb.ExecResponse_Stdout{Stdout: buf[:n]}, + }) + } + if err != nil { + return + } + } + }() + + // Stream stderr + wg.Add(1) + go func() { + defer wg.Done() + buf := make([]byte, 32 * 1024) + for { + n, err := stderr.Read(buf) + if n > 0 { + stream.Send(&pb.ExecResponse{ + Response: &pb.ExecResponse_Stderr{Stderr: buf[:n]}, + }) + } + if err != nil { + return + } + } + }() + + // Wait for command to finish or context cancellation + waitErr := cmd.Wait() + + // Wait for all output to be sent + wg.Wait() + + exitCode := int32(0) + if cmd.ProcessState != nil { + exitCode = int32(cmd.ProcessState.ExitCode()) + } else if waitErr != nil { + // If killed by timeout, exit with 124 (GNU timeout convention) + exitCode = 124 + } + + log.Printf("[exec-agent] command finished with exit code: %d", exitCode) + + // Send exit code + return stream.Send(&pb.ExecResponse{ + Response: &pb.ExecResponse_ExitCode{ExitCode: exitCode}, + }) +} + +// executeTTY executes command with TTY +func (s *execServer) executeTTY(ctx context.Context, stream pb.ExecService_ExecServer, start *pb.ExecStart) error { + // Run command directly with PTY - exec-agent is already running in container namespace + // This ensures PTY and shell are in the same namespace, fixing Ctrl+C signal handling + if len(start.Command) == 0 { + return fmt.Errorf("empty command") + } + + cmd := exec.CommandContext(ctx, start.Command[0], start.Command[1:]...) + + // Set up environment + cmd.Env = s.buildEnv(start.Env) + + // Set up working directory + if start.Cwd != "" { + cmd.Dir = start.Cwd + } + + // Start with PTY + ptmx, err := pty.Start(cmd) + if err != nil { + return fmt.Errorf("start pty: %w", err) + } + defer ptmx.Close() + + // Use WaitGroup to ensure all output is sent before exit code + var wg sync.WaitGroup + + // Handle stdin in background + go func() { + for { + req, err := stream.Recv() + if err != nil { + return + } + + if data := req.GetStdin(); data != nil { + ptmx.Write(data) + } + } + }() + + // Stream output + wg.Add(1) + go func() { + defer wg.Done() + buf := make([]byte, 32 * 1024) + for { + n, err := ptmx.Read(buf) + if n > 0 { + stream.Send(&pb.ExecResponse{ + Response: &pb.ExecResponse_Stdout{Stdout: buf[:n]}, + }) + } + if err != nil { + return + } + } + }() + + // Wait for command or context cancellation + waitErr := cmd.Wait() + + // Wait for all output to be sent + wg.Wait() + + exitCode := int32(0) + if cmd.ProcessState != nil { + exitCode = int32(cmd.ProcessState.ExitCode()) + } else if waitErr != nil { + // If killed by timeout, exit with 124 (GNU timeout convention) + exitCode = 124 + } + + log.Printf("[exec-agent] TTY command finished with exit code: %d", exitCode) + + // Send exit code + return stream.Send(&pb.ExecResponse{ + Response: &pb.ExecResponse_ExitCode{ExitCode: exitCode}, + }) +} + +// buildEnv constructs environment variables by merging provided env with defaults +func (s *execServer) buildEnv(envMap map[string]string) []string { + // Start with current environment as base + env := os.Environ() + + // Merge in provided environment variables + for k, v := range envMap { + env = append(env, fmt.Sprintf("%s=%s", k, v)) + } + + return env +} diff --git a/lib/system/exec_agent_binary.go b/lib/system/exec_agent_binary.go new file mode 100644 index 00000000..eb7ac875 --- /dev/null +++ b/lib/system/exec_agent_binary.go @@ -0,0 +1,9 @@ +package system + +import _ "embed" + +// ExecAgentBinary contains the embedded exec-agent binary +// This is built by the Makefile before the main binary is compiled +//go:embed exec_agent/exec-agent +var ExecAgentBinary []byte + diff --git a/lib/system/init_script.go b/lib/system/init_script.go index c5a4d88a..c51e0d30 100644 --- a/lib/system/init_script.go +++ b/lib/system/init_script.go @@ -9,19 +9,22 @@ package system // 3. Mounts and sources config disk (/dev/vdc) // 4. Configures networking (if enabled) // 5. Executes container entrypoint -func GenerateInitScript(version InitrdVersion) string { +func GenerateInitScript() string { return `#!/bin/sh set -xe -echo "overlay-init: START (` + string(version) + `)" > /dev/kmsg +echo "overlay-init: START" > /dev/kmsg # Create mount points mkdir -p /proc /sys /dev # Mount essential filesystems +# devtmpfs handles /dev population (null, zero, vsock, etc.) automatically mount -t proc none /proc mount -t sysfs none /sys mount -t devtmpfs none /dev + +# Setup PTY support (needed for exec-agent and interactive shells) mkdir -p /dev/pts /dev/shm mount -t devpts devpts /dev/pts chmod 1777 /dev/shm @@ -68,47 +71,69 @@ else exit 1 fi -# Move essential mounts to new root before chroot -cd /overlay/newroot -mkdir -p proc sys dev -mount --move /proc proc -mount --move /sys sys -mount --move /dev dev +# Prepare new root mount points +# We use bind mounts instead of move so that the original /dev remains populated +# for processes running in the initrd namespace (like exec-agent). +mkdir -p /overlay/newroot/proc +mkdir -p /overlay/newroot/sys +mkdir -p /overlay/newroot/dev +mkdir -p /overlay/newroot/dev/pts + +mount --bind /proc /overlay/newroot/proc +mount --bind /sys /overlay/newroot/sys +mount --bind /dev /overlay/newroot/dev +mount --bind /dev/pts /overlay/newroot/dev/pts -echo "overlay-init: moved mounts to new root" +echo "overlay-init: bound mounts to new root" -# Set up /dev symlinks for process substitution (Docker compatibility) -chroot . ln -sf /proc/self/fd /dev/fd 2>/dev/null || true -chroot . ln -sf /proc/self/fd/0 /dev/stdin 2>/dev/null || true -chroot . ln -sf /proc/self/fd/1 /dev/stdout 2>/dev/null || true -chroot . ln -sf /proc/self/fd/2 /dev/stderr 2>/dev/null || true +# Set up /dev symlinks for process substitution inside the container +chroot /overlay/newroot ln -sf /proc/self/fd /dev/fd 2>/dev/null || true +chroot /overlay/newroot ln -sf /proc/self/fd/0 /dev/stdin 2>/dev/null || true +chroot /overlay/newroot ln -sf /proc/self/fd/1 /dev/stdout 2>/dev/null || true +chroot /overlay/newroot ln -sf /proc/self/fd/2 /dev/stderr 2>/dev/null || true -# Configure network (if GUEST_IP is set) +# Configure network inside the container view if [ -n "${GUEST_IP:-}" ]; then echo "overlay-init: configuring network" - chroot . ip link set lo up - chroot . ip addr add ${GUEST_IP}/${GUEST_MASK} dev eth0 - chroot . ip link set eth0 up - chroot . ip route add default via ${GUEST_GW} - echo "nameserver ${GUEST_DNS}" > etc/resolv.conf + chroot /overlay/newroot ip link set lo up + chroot /overlay/newroot ip addr add ${GUEST_IP}/${GUEST_MASK} dev eth0 + chroot /overlay/newroot ip link set eth0 up + chroot /overlay/newroot ip route add default via ${GUEST_GW} + echo "nameserver ${GUEST_DNS}" > /overlay/newroot/etc/resolv.conf echo "overlay-init: network configured - IP: ${GUEST_IP}" fi -# Set PATH for proper binary resolution +# Set PATH for initrd tools export PATH='/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' export HOME='/root' +# Copy exec-agent into container rootfs and start it in container namespace +# This way the PTY and shell run in the same namespace, fixing signal handling +echo "overlay-init: copying exec-agent to container" +mkdir -p /overlay/newroot/usr/local/bin +cp /usr/local/bin/exec-agent /overlay/newroot/usr/local/bin/exec-agent + +# Start vsock exec agent inside the container namespace +echo "overlay-init: starting exec agent in container namespace" +chroot /overlay/newroot /usr/local/bin/exec-agent & + echo "overlay-init: launching entrypoint" echo "overlay-init: workdir=${WORKDIR:-/} entrypoint=${ENTRYPOINT} cmd=${CMD}" -# Change to workdir -cd ${WORKDIR:-/} +set +e + +# Construct the command string carefully +# ENTRYPOINT and CMD are shell-safe quoted strings from config.sh +eval "chroot /overlay/newroot /bin/sh -c \"cd ${WORKDIR:-/} && exec ${ENTRYPOINT} ${CMD}\"" & +APP_PID=$! + +echo "overlay-init: container app started (PID $APP_PID)" + +# Wait for app to exit +wait $APP_PID +APP_EXIT=$? -# Execute entrypoint with cmd as arguments -# Using eval to properly handle quoted arguments in ENTRYPOINT and CMD -# This preserves arguments like 'daemon off;' as single args -# When it exits, the VM stops (like Docker containers) -eval "exec chroot /overlay/newroot ${ENTRYPOINT} ${CMD}" -` +echo "overlay-init: app exited with code $APP_EXIT" +exit $APP_EXIT` } diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 8d4a6989..c409ec70 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -2,80 +2,143 @@ package system import ( "context" + "crypto/sha256" + "encoding/hex" "fmt" "os" "path/filepath" + "strconv" + "time" "github.com/onkernel/hypeman/lib/images" ) -// buildInitrd builds initrd from busybox + custom init script -func (m *manager) buildInitrd(ctx context.Context, version InitrdVersion, arch string) error { +const alpineBaseImage = "alpine:3.22" + +// buildInitrd builds initrd from Alpine base + embedded exec-agent + generated init script +func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) { // Create temp directory for building tempDir, err := os.MkdirTemp("", "hypeman-initrd-*") if err != nil { - return fmt.Errorf("create temp dir: %w", err) + return "", fmt.Errorf("create temp dir: %w", err) } defer os.RemoveAll(tempDir) rootfsDir := filepath.Join(tempDir, "rootfs") - // Get pinned busybox version for this initrd version (ensures reproducible builds) - busyboxRef, ok := InitrdBusyboxVersions[version] - if !ok { - return fmt.Errorf("no busybox version defined for initrd %s", version) - } - - // Create a temporary OCI client (reuses image manager's cache) + // Create OCI client (reuses image manager's cache) cacheDir := m.paths.SystemOCICache() ociClient, err := images.NewOCIClient(cacheDir) if err != nil { - return fmt.Errorf("create oci client: %w", err) + return "", fmt.Errorf("create oci client: %w", err) } - // Inspect to get digest - digest, err := ociClient.InspectManifest(ctx, busyboxRef) + // Inspect Alpine base to get digest + digest, err := ociClient.InspectManifest(ctx, alpineBaseImage) if err != nil { - return fmt.Errorf("inspect busybox manifest: %w", err) + return "", fmt.Errorf("inspect alpine manifest: %w", err) + } + + // Pull and unpack Alpine base + if err := ociClient.PullAndUnpack(ctx, alpineBaseImage, digest, rootfsDir); err != nil { + return "", fmt.Errorf("pull alpine base: %w", err) } - // Pull and unpack busybox - if err := ociClient.PullAndUnpack(ctx, busyboxRef, digest, rootfsDir); err != nil { - return fmt.Errorf("pull busybox: %w", err) + // Write embedded exec-agent binary + binDir := filepath.Join(rootfsDir, "usr/local/bin") + if err := os.MkdirAll(binDir, 0755); err != nil { + return "", fmt.Errorf("create bin dir: %w", err) + } + + agentPath := filepath.Join(binDir, "exec-agent") + if err := os.WriteFile(agentPath, ExecAgentBinary, 0755); err != nil { + return "", fmt.Errorf("write exec-agent: %w", err) } - // Inject init script - initScript := GenerateInitScript(version) + // Write generated init script + initScript := GenerateInitScript() initPath := filepath.Join(rootfsDir, "init") if err := os.WriteFile(initPath, []byte(initScript), 0755); err != nil { - return fmt.Errorf("write init script: %w", err) + return "", fmt.Errorf("write init script: %w", err) } - // Package as cpio.gz (initramfs format) - outputPath := m.paths.SystemInitrd(string(version), arch) + // Generate timestamp for this build + timestamp := strconv.FormatInt(time.Now().Unix(), 10) + + // Package as cpio.gz + outputPath := m.paths.SystemInitrdTimestamp(timestamp, arch) + if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { + return "", fmt.Errorf("create output dir: %w", err) + } + if _, err := images.ExportRootfs(rootfsDir, outputPath, images.FormatCpio); err != nil { - return fmt.Errorf("export initrd: %w", err) + return "", fmt.Errorf("export initrd: %w", err) } - return nil -} + // Store hash for staleness detection + hashPath := filepath.Join(filepath.Dir(outputPath), ".hash") + currentHash := computeInitrdHash() + if err := os.WriteFile(hashPath, []byte(currentHash), 0644); err != nil { + return "", fmt.Errorf("write hash file: %w", err) + } -// ensureInitrd ensures initrd exists, builds if missing -func (m *manager) ensureInitrd(ctx context.Context, version InitrdVersion) (string, error) { - arch := GetArch() + // Update 'latest' symlink + latestLink := m.paths.SystemInitrdLatest(arch) + // Remove old symlink if it exists + os.Remove(latestLink) + // Create new symlink (relative path) + if err := os.Symlink(timestamp, latestLink); err != nil { + return "", fmt.Errorf("create latest symlink: %w", err) + } - initrdPath := m.paths.SystemInitrd(string(version), arch) + return outputPath, nil +} - // Check if already exists - if _, err := os.Stat(initrdPath); err == nil { - return initrdPath, nil +// ensureInitrd ensures initrd exists and is up-to-date, builds if missing or stale +func (m *manager) ensureInitrd(ctx context.Context) (string, error) { + arch := GetArch() + latestLink := m.paths.SystemInitrdLatest(arch) + + // Check if latest symlink exists + if target, err := os.Readlink(latestLink); err == nil { + // Symlink exists, check if the actual file exists + initrdPath := m.paths.SystemInitrdTimestamp(target, arch) + if _, err := os.Stat(initrdPath); err == nil { + // File exists, check if it's stale by comparing embedded binary hash + if !m.isInitrdStale(initrdPath) { + return initrdPath, nil + } + } } - // Build initrd - if err := m.buildInitrd(ctx, version, arch); err != nil { + // Build new initrd + initrdPath, err := m.buildInitrd(ctx, arch) + if err != nil { return "", fmt.Errorf("build initrd: %w", err) } return initrdPath, nil } +// isInitrdStale checks if the initrd needs rebuilding by comparing hashes +func (m *manager) isInitrdStale(initrdPath string) bool { + // Read stored hash + hashPath := filepath.Join(filepath.Dir(initrdPath), ".hash") + storedHash, err := os.ReadFile(hashPath) + if err != nil { + // No hash file, consider stale + return true + } + + // Compare with current hash + currentHash := computeInitrdHash() + return string(storedHash) != currentHash +} + +// computeInitrdHash computes a hash of the embedded binary and init script +func computeInitrdHash() string { + h := sha256.New() + h.Write(ExecAgentBinary) + h.Write([]byte(GenerateInitScript())) + return hex.EncodeToString(h.Sum(nil))[:16] +} diff --git a/lib/system/manager.go b/lib/system/manager.go index 1f538435..87fc4282 100644 --- a/lib/system/manager.go +++ b/lib/system/manager.go @@ -3,6 +3,7 @@ package system import ( "context" "fmt" + "os" "github.com/onkernel/hypeman/lib/paths" ) @@ -15,11 +16,11 @@ type Manager interface { // GetKernelPath returns path to kernel file GetKernelPath(version KernelVersion) (string, error) - // GetInitrdPath returns path to initrd file - GetInitrdPath(version InitrdVersion) (string, error) + // GetInitrdPath returns path to current initrd file + GetInitrdPath() (string, error) - // GetDefaultVersions returns the default kernel and initrd versions - GetDefaultVersions() (KernelVersion, InitrdVersion) + // GetDefaultKernelVersion returns the default kernel version + GetDefaultKernelVersion() KernelVersion } type manager struct { @@ -35,16 +36,16 @@ func NewManager(p *paths.Paths) Manager { // EnsureSystemFiles ensures default kernel and initrd exist, downloading/building if needed func (m *manager) EnsureSystemFiles(ctx context.Context) error { - kernelVer, initrdVer := m.GetDefaultVersions() + kernelVer := m.GetDefaultKernelVersion() // Ensure kernel exists if _, err := m.ensureKernel(kernelVer); err != nil { return fmt.Errorf("ensure kernel %s: %w", kernelVer, err) } - // Ensure initrd exists - if _, err := m.ensureInitrd(ctx, initrdVer); err != nil { - return fmt.Errorf("ensure initrd %s: %w", initrdVer, err) + // Ensure initrd exists (builds if missing or stale) + if _, err := m.ensureInitrd(ctx); err != nil { + return fmt.Errorf("ensure initrd: %w", err) } return nil @@ -57,15 +58,22 @@ func (m *manager) GetKernelPath(version KernelVersion) (string, error) { return path, nil } -// GetInitrdPath returns the path to an initrd version -func (m *manager) GetInitrdPath(version InitrdVersion) (string, error) { +// GetInitrdPath returns the path to the current initrd file +func (m *manager) GetInitrdPath() (string, error) { arch := GetArch() - path := m.paths.SystemInitrd(string(version), arch) - return path, nil + latestLink := m.paths.SystemInitrdLatest(arch) + + // Read the symlink to get the timestamp + target, err := os.Readlink(latestLink) + if err != nil { + return "", fmt.Errorf("read latest symlink: %w", err) + } + + return m.paths.SystemInitrdTimestamp(target, arch), nil } -// GetDefaultVersions returns the default kernel and initrd versions -func (m *manager) GetDefaultVersions() (KernelVersion, InitrdVersion) { - return DefaultKernelVersion, DefaultInitrdVersion +// GetDefaultKernelVersion returns the default kernel version +func (m *manager) GetDefaultKernelVersion() KernelVersion { + return DefaultKernelVersion } diff --git a/lib/system/manager_test.go b/lib/system/manager_test.go index 42a2a5ee..3540e448 100644 --- a/lib/system/manager_test.go +++ b/lib/system/manager_test.go @@ -9,30 +9,23 @@ import ( "github.com/stretchr/testify/require" ) -func TestGetDefaultVersions(t *testing.T) { +func TestGetDefaultKernelVersion(t *testing.T) { tmpDir := t.TempDir() mgr := NewManager(paths.New(tmpDir)) - kernelVer, initrdVer := mgr.GetDefaultVersions() + kernelVer := mgr.GetDefaultKernelVersion() assert.Equal(t, DefaultKernelVersion, kernelVer) - assert.Equal(t, DefaultInitrdVersion, initrdVer) } -func TestGetPaths(t *testing.T) { +func TestGetKernelPath(t *testing.T) { tmpDir := t.TempDir() mgr := NewManager(paths.New(tmpDir)) // Get kernel path - kernelPath, err := mgr.GetKernelPath(KernelCH_6_12_8_20250613) + kernelPath, err := mgr.GetKernelPath(DefaultKernelVersion) require.NoError(t, err) - assert.Contains(t, kernelPath, "kernel/ch-release-v6.12.8-20250613") + assert.Contains(t, kernelPath, "kernel") assert.Contains(t, kernelPath, "vmlinux") - - // Get initrd path - initrdPath, err := mgr.GetInitrdPath(InitrdV1_0_0) - require.NoError(t, err) - assert.Contains(t, initrdPath, "initrd/v1.0.0") - assert.Contains(t, initrdPath, "initrd") } func TestEnsureSystemFiles(t *testing.T) { @@ -56,7 +49,7 @@ func TestEnsureSystemFiles(t *testing.T) { assert.FileExists(t, kernelPath) // Verify initrd exists - initrdPath, err := mgr.GetInitrdPath(DefaultInitrdVersion) + initrdPath, err := mgr.GetInitrdPath() require.NoError(t, err) assert.FileExists(t, initrdPath) @@ -66,7 +59,7 @@ func TestEnsureSystemFiles(t *testing.T) { } func TestInitScriptGeneration(t *testing.T) { - script := GenerateInitScript(InitrdV1_0_0) + script := GenerateInitScript() // Verify script contains essential components assert.Contains(t, script, "#!/bin/sh") @@ -74,8 +67,7 @@ func TestInitScriptGeneration(t *testing.T) { assert.Contains(t, script, "/dev/vda") // rootfs disk assert.Contains(t, script, "/dev/vdb") // overlay disk assert.Contains(t, script, "/dev/vdc") // config disk - assert.Contains(t, script, "exec chroot") + assert.Contains(t, script, "exec-agent") // vsock exec agent assert.Contains(t, script, "${ENTRYPOINT}") - assert.Contains(t, script, "v1.0.0") // Version in script + assert.Contains(t, script, "wait $APP_PID") // Supervisor pattern } - diff --git a/lib/system/versions.go b/lib/system/versions.go index 35493a00..167cb4d9 100644 --- a/lib/system/versions.go +++ b/lib/system/versions.go @@ -5,50 +5,27 @@ import "runtime" // KernelVersion represents a Cloud Hypervisor kernel version type KernelVersion string -// InitrdVersion represents our internal initrd version -type InitrdVersion string - const ( - // Kernel versions from Cloud Hypervisor releases (full version with date) - KernelCH_6_12_8_20250613 KernelVersion = "ch-release-v6.12.8-20250613" - - // Initrd versions (our internal versioning) - // Bump when init script logic changes - InitrdV1_0_0 InitrdVersion = "v1.0.0" + // Kernel versions from Kernel linux build + Kernel_202511182 KernelVersion = "ch-6.12.8-kernel-1-202511182" ) -// InitrdBusyboxVersions maps initrd versions to specific busybox digests -// Using digest references (not mutable tags) ensures reproducible builds -// When bumping initrd version, you can reuse the same busybox digest if busybox doesn't need updating -var InitrdBusyboxVersions = map[InitrdVersion]string{ - InitrdV1_0_0: "docker.io/library/busybox@sha256:355b3a1bf5609da364166913878a8508d4ba30572d02020a97028c75477e24ff", // busybox:stable as of 2025-01-12 - // Add future versions here -} - var ( // DefaultKernelVersion is the kernel version used for new instances - DefaultKernelVersion = KernelCH_6_12_8_20250613 - - // DefaultInitrdVersion is the initrd version used for new instances - DefaultInitrdVersion = InitrdV1_0_0 + DefaultKernelVersion = Kernel_202511182 // SupportedKernelVersions lists all supported kernel versions SupportedKernelVersions = []KernelVersion{ - KernelCH_6_12_8_20250613, + Kernel_202511182, // Add future versions here } - - // SupportedInitrdVersions lists all supported initrd versions - SupportedInitrdVersions = []InitrdVersion{ - InitrdV1_0_0, - } ) // KernelDownloadURLs maps kernel versions and architectures to download URLs var KernelDownloadURLs = map[KernelVersion]map[string]string{ - KernelCH_6_12_8_20250613: { - "x86_64": "https://github.com/cloud-hypervisor/linux/releases/download/ch-release-v6.12.8-20250613/vmlinux-x86_64", - "aarch64": "https://github.com/cloud-hypervisor/linux/releases/download/ch-release-v6.12.8-20250613/Image-aarch64", + Kernel_202511182: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1-202511182/vmlinux-x86_64", + "aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1-202511182/Image-arm64", }, // Add future versions here } diff --git a/lib/system/versions_test.go b/lib/system/versions_test.go deleted file mode 100644 index 6a0d3ee4..00000000 --- a/lib/system/versions_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package system - -import ( - "crypto/sha256" - "fmt" - "testing" - - "github.com/stretchr/testify/require" -) - -// expectedInitrdHashes maps initrd versions to their expected content hash -// The hash is computed from: sha256(initScript + busyboxDigest) -// This ensures that changes to either the script OR busybox version require a version bump -var expectedInitrdHashes = map[InitrdVersion]string{ - InitrdV1_0_0: "a787826fcc61f75cea4f28ef9b0c4f6c25e18866583c652174bb2c20bfe2de6c", - // Add future versions here -} - -func TestInitrdVersionIntegrity(t *testing.T) { - for version, expectedHash := range expectedInitrdHashes { - t.Run(string(version), func(t *testing.T) { - // Get the busybox digest for this version - busyboxDigest, ok := InitrdBusyboxVersions[version] - require.True(t, ok, "Missing busybox digest for %s", version) - - // Compute hash from script + digest - script := GenerateInitScript(version) - combined := script + busyboxDigest - actualHash := fmt.Sprintf("%x", sha256.Sum256([]byte(combined))) - - if expectedHash == "PLACEHOLDER" { - t.Fatalf("Initrd %s needs hash to be set.\n"+ - "Add this to expectedInitrdHashes in versions_test.go:\n"+ - " InitrdV1_0_0: %q,\n", - version, actualHash) - } - - require.Equal(t, expectedHash, actualHash, - "Initrd %s content changed!\n"+ - "Expected hash: %s\n"+ - "Actual hash: %s\n\n"+ - "If this is intentional, create a new version:\n"+ - "1. Add new constant in versions.go: InitrdV1_1_0 = \"v1.1.0\"\n"+ - "2. Add busybox digest to InitrdBusyboxVersions map\n"+ - "3. Add to SupportedInitrdVersions list\n"+ - "4. Add this hash to expectedInitrdHashes in versions_test.go:\n"+ - " InitrdV1_1_0: %q,\n"+ - "5. Update DefaultInitrdVersion if this should be the new default\n", - version, expectedHash, actualHash, actualHash) - }) - } -} - -func TestInitrdBusyboxVersionsArePinned(t *testing.T) { - // Ensure all initrd versions use digest-pinned busybox references (not mutable tags) - for version, busyboxRef := range InitrdBusyboxVersions { - require.Contains(t, busyboxRef, "@sha256:", - "busybox version for %s must be pinned to a digest (e.g., busybox@sha256:...), not a mutable tag like :stable", - version) - } -} - -func TestAllInitrdVersionsHaveExpectedHash(t *testing.T) { - // Ensure every initrd version in InitrdBusyboxVersions has a corresponding hash - for version := range InitrdBusyboxVersions { - _, ok := expectedInitrdHashes[version] - require.True(t, ok, "Initrd version %s is missing from expectedInitrdHashes map in versions_test.go", version) - } -} - diff --git a/openapi.yaml b/openapi.yaml index 60bb4b87..1d6965eb 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -330,6 +330,21 @@ components: description: Mount as read-only default: false + ExecRequest: + type: object + required: [command] + properties: + command: + type: array + items: + type: string + description: Command and arguments to execute + example: ["/bin/sh"] + tty: + type: boolean + description: Allocate a pseudo-TTY + default: true + Health: type: object required: [status] @@ -729,6 +744,55 @@ paths: schema: $ref: "#/components/schemas/Error" + /instances/{id}/exec: + post: + summary: Execute command in instance via vsock (WebSocket) + operationId: execInstance + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + - name: command + in: query + required: false + schema: + type: array + items: + type: string + description: Command to execute (defaults to /bin/sh) + - name: tty + in: query + required: false + schema: + type: boolean + default: true + description: Allocate a pseudo-TTY + responses: + 101: + description: Switching to WebSocket protocol + 404: + description: Instance not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 409: + description: Instance not running + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /instances/{id}/volumes/{volumeId}: post: summary: Attach volume to instance