From ac6bd97e77d792676788200426fcfde72af2dbf7 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 13:54:42 -0500 Subject: [PATCH 01/14] Operational logs over API: hypeman.log, vmm.log --- cmd/api/api/exec.go | 6 +- cmd/api/api/exec_test.go | 2 +- cmd/api/api/instances.go | 20 +++- cmd/api/main.go | 3 +- cmd/api/wire_gen.go | 4 +- lib/instances/create.go | 2 +- lib/instances/logs.go | 35 +++++- lib/instances/manager.go | 23 ++-- lib/instances/manager_test.go | 14 +-- lib/instances/query.go | 8 +- lib/instances/storage.go | 6 +- lib/logger/README.md | 15 +++ lib/logger/instance_handler.go | 164 ++++++++++++++++++++++++++ lib/network/allocate.go | 6 +- lib/network/derive.go | 3 +- lib/oapi/oapi.go | 205 ++++++++++++++++++++------------- lib/paths/paths.go | 16 ++- lib/providers/providers.go | 16 ++- lib/vmm/client.go | 34 +++--- openapi.yaml | 19 ++- 20 files changed, 454 insertions(+), 147 deletions(-) create mode 100644 lib/logger/instance_handler.go diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index 39e823c5..e234f4cf 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -105,7 +105,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session started log.InfoContext(ctx, "exec session started", - "instance_id", instanceID, + "id", instanceID, "subject", subject, "command", execReq.Command, "tty", execReq.TTY, @@ -133,7 +133,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { if err != nil { log.ErrorContext(ctx, "exec failed", "error", err, - "instance_id", instanceID, + "id", instanceID, "subject", subject, "duration_ms", duration.Milliseconds(), ) @@ -148,7 +148,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session ended log.InfoContext(ctx, "exec session ended", - "instance_id", instanceID, + "id", instanceID, "subject", subject, "exit_code", exit.Code, "duration_ms", duration.Milliseconds(), diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 8e5992f2..129e2a92 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -91,7 +91,7 @@ func TestExecInstanceNonTTY(t *testing.T) { // Capture console log on failure with exec-agent filtering t.Cleanup(func() { if t.Failed() { - consolePath := paths.New(svc.Config.DataDir).InstanceConsoleLog(inst.Id) + consolePath := paths.New(svc.Config.DataDir).InstanceAppLog(inst.Id) if consoleData, err := os.ReadFile(consolePath); err == nil { lines := strings.Split(string(consoleData), "\n") diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 8a599db3..602b622e 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -466,6 +466,19 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan follow = *request.Params.Follow } + // Map source parameter to LogSource type (default to app) + source := instances.LogSourceApp + if request.Params.Source != nil { + switch *request.Params.Source { + case oapi.App: + source = instances.LogSourceApp + case oapi.Vmm: + source = instances.LogSourceVMM + case oapi.Hypeman: + source = instances.LogSourceHypeman + } + } + // Resolve to get the actual instance ID resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) if err != nil { @@ -488,7 +501,7 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan } } - logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, resolved.Id, tail, follow) + logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, resolved.Id, tail, follow, source) if err != nil { switch { case errors.Is(err, instances.ErrTailNotFound): @@ -496,6 +509,11 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan Code: "dependency_missing", Message: "tail command not found on server - required for log streaming", }, nil + case errors.Is(err, instances.ErrLogNotFound): + return oapi.GetInstanceLogs404JSONResponse{ + Code: "log_not_found", + Message: "requested log file does not exist yet", + }, nil default: return oapi.GetInstanceLogs500JSONResponse{ Code: "internal_error", diff --git a/cmd/api/main.go b/cmd/api/main.go index 13d3ca84..fc269678 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -226,7 +226,8 @@ func run() error { } // Inject logger into request context for handlers to use - r.Use(mw.InjectLogger(accessLogger)) + // Use app logger (not accessLogger) so the instance log handler is included + r.Use(mw.InjectLogger(logger)) // Access logger AFTER otelchi so trace context is available r.Use(mw.AccessLogger(accessLogger)) diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 79bfc61f..5a94276c 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -29,10 +29,10 @@ import ( // initializeApp is the injector function func initializeApp() (*application, func(), error) { - logger := providers.ProvideLogger() - context := providers.ProvideContext(logger) config := providers.ProvideConfig() paths := providers.ProvidePaths(config) + logger := providers.ProvideLogger(paths) + context := providers.ProvideContext(logger) manager, err := providers.ProvideImageManager(paths, config) if err != nil { return nil, nil, err diff --git a/lib/instances/create.go b/lib/instances/create.go index b82e421d..546e163c 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -619,7 +619,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf // Serial console configuration serial := vmm.ConsoleConfig{ Mode: vmm.ConsoleConfigMode("File"), - File: ptr(m.paths.InstanceConsoleLog(inst.Id)), + File: ptr(m.paths.InstanceAppLog(inst.Id)), } // Console off (we use serial) diff --git a/lib/instances/logs.go b/lib/instances/logs.go index 1b89acf1..7431b219 100644 --- a/lib/instances/logs.go +++ b/lib/instances/logs.go @@ -12,14 +12,29 @@ import ( "github.com/onkernel/hypeman/lib/logger" ) +// LogSource represents a log source type +type LogSource string + +const ( + // LogSourceApp is the guest application log (serial console) + LogSourceApp LogSource = "app" + // LogSourceVMM is the Cloud Hypervisor VMM log + LogSourceVMM LogSource = "vmm" + // LogSourceHypeman is the hypeman operations log + LogSourceHypeman LogSource = "hypeman" +) + // ErrTailNotFound is returned when the tail command is not available var ErrTailNotFound = fmt.Errorf("tail command not found: required for log streaming") -// StreamInstanceLogs streams instance console logs +// ErrLogNotFound is returned when the requested log file doesn't exist +var ErrLogNotFound = fmt.Errorf("log file not found") + +// streamInstanceLogs streams instance logs from the specified source // Returns last N lines, then continues following if follow=true -func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) { +func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) { log := logger.FromContext(ctx) - log.DebugContext(ctx, "starting log stream", "id", id, "tail", tail, "follow", follow) + log.DebugContext(ctx, "starting log stream", "id", id, "tail", tail, "follow", follow, "source", source) // Verify tail command is available if _, err := exec.LookPath("tail"); err != nil { @@ -30,7 +45,19 @@ func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, f return nil, err } - logPath := m.paths.InstanceConsoleLog(id) + // Determine log path based on source + var logPath string + switch source { + case LogSourceApp: + logPath = m.paths.InstanceAppLog(id) + case LogSourceVMM: + logPath = m.paths.InstanceVMMLog(id) + case LogSourceHypeman: + logPath = m.paths.InstanceHypemanLog(id) + default: + // Default to app log for backwards compatibility + logPath = m.paths.InstanceAppLog(id) + } // Build tail command args := []string{"-n", strconv.Itoa(tail)} diff --git a/lib/instances/manager.go b/lib/instances/manager.go index efbe2d85..bce2d04e 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -26,7 +26,7 @@ type Manager interface { RestoreInstance(ctx context.Context, id string) (*Instance, error) StopInstance(ctx context.Context, id string) (*Instance, error) StartInstance(ctx context.Context, id string) (*Instance, error) - StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) + StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) error AttachVolume(ctx context.Context, id string, volumeId string, req AttachVolumeRequest) (*Instance, error) DetachVolume(ctx context.Context, id string, volumeId string) (*Instance, error) @@ -197,15 +197,15 @@ func (m *manager) GetInstance(ctx context.Context, idOrName string) (*Instance, return nil, ErrNotFound } -// StreamInstanceLogs streams instance console logs +// StreamInstanceLogs streams instance logs from the specified source // Returns last N lines, then continues following if follow=true -func (m *manager) StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool) (<-chan string, error) { +func (m *manager) StreamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) { // Note: No lock held during streaming - we read from the file continuously // and the file is append-only, so this is safe - return m.streamInstanceLogs(ctx, id, tail, follow) + return m.streamInstanceLogs(ctx, id, tail, follow, source) } -// RotateLogs rotates console logs for all instances that exceed maxBytes +// RotateLogs rotates all instance logs (app, vmm, hypeman) that exceed maxBytes func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) error { instances, err := m.listInstances(ctx) if err != nil { @@ -214,9 +214,16 @@ func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) var lastErr error for _, inst := range instances { - logPath := m.paths.InstanceConsoleLog(inst.Id) - if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil { - lastErr = err // Continue with other instances, but track error + // Rotate all three log types + logPaths := []string{ + m.paths.InstanceAppLog(inst.Id), + m.paths.InstanceVMMLog(inst.Id), + m.paths.InstanceHypemanLog(inst.Id), + } + for _, logPath := range logPaths { + if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil { + lastErr = err // Continue with other logs, but track error + } } } return lastErr diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 7e0b2dcf..d4633bfe 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -692,7 +692,7 @@ func TestBasicEndToEnd(t *testing.T) { }() // Append marker to console log file - consoleLogPath := p.InstanceConsoleLog(inst.Id) + consoleLogPath := p.InstanceAppLog(inst.Id) f, err := os.OpenFile(consoleLogPath, os.O_APPEND|os.O_WRONLY, 0644) require.NoError(t, err) _, err = fmt.Fprintln(f, marker) @@ -899,12 +899,12 @@ func TestStandbyAndRestore(t *testing.T) { t.Logf(" - %s (size: %d bytes)", entry.Name(), info.Size()) } - // DEBUG: Check console.log file size before restore - consoleLogPath := filepath.Join(tmpDir, "guests", inst.Id, "logs", "console.log") + // DEBUG: Check app.log file size before restore + consoleLogPath := filepath.Join(tmpDir, "guests", inst.Id, "logs", "app.log") var consoleLogSizeBefore int64 if info, err := os.Stat(consoleLogPath); err == nil { consoleLogSizeBefore = info.Size() - t.Logf("DEBUG: console.log size before restore: %d bytes", consoleLogSizeBefore) + t.Logf("DEBUG: app.log size before restore: %d bytes", consoleLogSizeBefore) } // Restore instance @@ -914,13 +914,13 @@ func TestStandbyAndRestore(t *testing.T) { assert.Equal(t, StateRunning, inst.State) t.Log("Instance restored and running") - // DEBUG: Check console.log file size after restore + // DEBUG: Check app.log file size after restore if info, err := os.Stat(consoleLogPath); err == nil { consoleLogSizeAfter := info.Size() - t.Logf("DEBUG: console.log size after restore: %d bytes", consoleLogSizeAfter) + t.Logf("DEBUG: app.log size after restore: %d bytes", consoleLogSizeAfter) t.Logf("DEBUG: File size diff: %d bytes", consoleLogSizeAfter-consoleLogSizeBefore) if consoleLogSizeAfter < consoleLogSizeBefore { - t.Logf("DEBUG: WARNING! console.log was TRUNCATED (lost %d bytes)", consoleLogSizeBefore-consoleLogSizeAfter) + t.Logf("DEBUG: WARNING! app.log was TRUNCATED (lost %d bytes)", consoleLogSizeBefore-consoleLogSizeAfter) } } diff --git a/lib/instances/query.go b/lib/instances/query.go index 154895b9..28acc32c 100644 --- a/lib/instances/query.go +++ b/lib/instances/query.go @@ -36,7 +36,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Failed to create client - this is unexpected if socket exists errMsg := fmt.Sprintf("failed to create VMM client: %v", err) log.WarnContext(ctx, "failed to determine instance state", - "instance_id", stored.Id, + "id", stored.Id, "socket", stored.SocketPath, "error", err, ) @@ -48,7 +48,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Socket exists but VMM is unreachable - this is unexpected errMsg := fmt.Sprintf("failed to query VMM: %v", err) log.WarnContext(ctx, "failed to query VMM state", - "instance_id", stored.Id, + "id", stored.Id, "socket", stored.SocketPath, "error", err, ) @@ -60,7 +60,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state body := string(resp.Body) errMsg := fmt.Sprintf("VMM returned error (status %d): %s", resp.StatusCode(), body) log.WarnContext(ctx, "VMM returned error response", - "instance_id", stored.Id, + "id", stored.Id, "socket", stored.SocketPath, "status_code", resp.StatusCode(), "body", body, @@ -82,7 +82,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Unknown CH state - log and return Unknown errMsg := fmt.Sprintf("unexpected VMM state: %s", resp.JSON200.State) log.WarnContext(ctx, "VMM returned unexpected state", - "instance_id", stored.Id, + "id", stored.Id, "vmm_state", resp.JSON200.State, ) return stateResult{State: StateUnknown, Error: &errMsg} diff --git a/lib/instances/storage.go b/lib/instances/storage.go index 35efee99..1332137a 100644 --- a/lib/instances/storage.go +++ b/lib/instances/storage.go @@ -15,9 +15,10 @@ import ( // overlay.raw # Configurable sparse overlay disk (default 10GB) // config.ext4 # Read-only config disk (generated) // ch.sock # Cloud Hypervisor API socket -// ch-stdout.log # CH process output // logs/ -// console.log # Serial console output +// app.log # Guest application log (serial console output) +// vmm.log # Cloud Hypervisor VMM log (stdout+stderr combined) +// hypeman.log # Hypeman operations log (actions taken on this instance) // snapshots/ // snapshot-latest/ # Snapshot directory // config.json @@ -141,4 +142,3 @@ func (m *manager) listMetadataFiles() ([]string, error) { return metaFiles, nil } - diff --git a/lib/logger/README.md b/lib/logger/README.md index 4996bfeb..9ac813d1 100644 --- a/lib/logger/README.md +++ b/lib/logger/README.md @@ -8,6 +8,7 @@ Structured logging with per-subsystem log levels and OpenTelemetry trace context - Automatic trace_id/span_id injection when OTel is active - Context-based logger propagation - JSON output format +- Per-instance log files via `InstanceLogHandler` ## Configuration @@ -38,6 +39,20 @@ log = logger.FromContext(ctx) log.InfoContext(ctx, "instance created", "id", instanceID) ``` +## Per-Instance Logging + +The `InstanceLogHandler` automatically writes logs with an `"id"` attribute to per-instance `hypeman.log` files. This provides an operations audit trail for each VM. + +```go +// Wrap any handler with instance logging +handler := logger.NewInstanceLogHandler(baseHandler, func(id string) string { + return paths.InstanceHypemanLog(id) +}) + +// Logs with "id" attribute are automatically written to that instance's hypeman.log +log.InfoContext(ctx, "starting VM", "id", instanceID) +``` + ## Output When OTel tracing is active, logs include trace context: diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go new file mode 100644 index 00000000..f599b60f --- /dev/null +++ b/lib/logger/instance_handler.go @@ -0,0 +1,164 @@ +// Package logger provides structured logging with subsystem-specific levels +// and OpenTelemetry trace context integration. +package logger + +import ( + "context" + "fmt" + "log/slog" + "os" + "path/filepath" + "sync" + "time" +) + +// InstanceLogHandler wraps an slog.Handler and additionally writes logs +// that have an "id" attribute to a per-instance hypeman.log file. +// This provides automatic per-instance logging without manual instrumentation. +type InstanceLogHandler struct { + slog.Handler + logPathFunc func(id string) string // returns path to hypeman.log for an instance + mu sync.Mutex + fileCache map[string]*os.File +} + +// NewInstanceLogHandler creates a new handler that wraps the given handler +// and writes instance-related logs to per-instance log files. +// logPathFunc should return the path to hypeman.log for a given instance ID. +func NewInstanceLogHandler(wrapped slog.Handler, logPathFunc func(id string) string) *InstanceLogHandler { + return &InstanceLogHandler{ + Handler: wrapped, + logPathFunc: logPathFunc, + fileCache: make(map[string]*os.File), + } +} + +// Handle processes a log record, passing it to the wrapped handler and +// optionally writing to a per-instance log file if "id" attribute is present. +func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { + // Always pass to wrapped handler first + if err := h.Handler.Handle(ctx, r); err != nil { + return err + } + + // Check for instance ID in attributes + var instanceID string + r.Attrs(func(a slog.Attr) bool { + if a.Key == "id" { + instanceID = a.Value.String() + return false // stop iteration + } + return true + }) + + // If instance ID found, also write to per-instance log + if instanceID != "" { + h.writeToInstanceLog(instanceID, r) + } + + return nil +} + +// writeToInstanceLog writes a log record to the instance's hypeman.log file. +func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record) { + logPath := h.logPathFunc(instanceID) + if logPath == "" { + return + } + + // Get or create file handle + h.mu.Lock() + f, ok := h.fileCache[instanceID] + if !ok { + // Ensure directory exists + dir := filepath.Dir(logPath) + if err := os.MkdirAll(dir, 0755); err != nil { + h.mu.Unlock() + return // silently skip if can't create directory + } + + var err error + f, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + h.mu.Unlock() + return // silently skip if can't open file + } + h.fileCache[instanceID] = f + } + h.mu.Unlock() + + // Format log line: timestamp LEVEL message key=value key=value... + timestamp := r.Time.Format(time.RFC3339) + level := r.Level.String() + msg := r.Message + + // Collect attributes (excluding "id" since it's implicit) + var attrs []string + r.Attrs(func(a slog.Attr) bool { + if a.Key != "id" { + attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) + } + return true + }) + + // Build log line + line := fmt.Sprintf("%s %s %s", timestamp, level, msg) + for _, attr := range attrs { + line += " " + attr + } + line += "\n" + + // Write to file (best effort, don't block on errors) + h.mu.Lock() + f.WriteString(line) + h.mu.Unlock() +} + +// Enabled reports whether the handler handles records at the given level. +func (h *InstanceLogHandler) Enabled(ctx context.Context, level slog.Level) bool { + return h.Handler.Enabled(ctx, level) +} + +// WithAttrs returns a new handler with the given attributes. +func (h *InstanceLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + return &InstanceLogHandler{ + Handler: h.Handler.WithAttrs(attrs), + logPathFunc: h.logPathFunc, + fileCache: h.fileCache, + mu: sync.Mutex{}, + } +} + +// WithGroup returns a new handler with the given group name. +func (h *InstanceLogHandler) WithGroup(name string) slog.Handler { + return &InstanceLogHandler{ + Handler: h.Handler.WithGroup(name), + logPathFunc: h.logPathFunc, + fileCache: h.fileCache, + mu: sync.Mutex{}, + } +} + +// CloseInstanceLog closes and removes a cached file handle for an instance. +// Call this when an instance is deleted. +func (h *InstanceLogHandler) CloseInstanceLog(instanceID string) { + h.mu.Lock() + defer h.mu.Unlock() + + if f, ok := h.fileCache[instanceID]; ok { + f.Close() + delete(h.fileCache, instanceID) + } +} + +// CloseAll closes all cached file handles. +// Call this during shutdown. +func (h *InstanceLogHandler) CloseAll() { + h.mu.Lock() + defer h.mu.Unlock() + + for id, f := range h.fileCache { + f.Close() + delete(h.fileCache, id) + } +} diff --git a/lib/network/allocate.go b/lib/network/allocate.go index b3f35900..ddd95f6b 100644 --- a/lib/network/allocate.go +++ b/lib/network/allocate.go @@ -62,7 +62,7 @@ func (m *manager) CreateAllocation(ctx context.Context, req AllocateRequest) (*N m.recordTAPOperation(ctx, "create") log.InfoContext(ctx, "allocated network", - "instance_id", req.InstanceID, + "id", req.InstanceID, "instance_name", req.InstanceName, "network", "default", "ip", ip, @@ -115,7 +115,7 @@ func (m *manager) RecreateAllocation(ctx context.Context, instanceID string) err m.recordTAPOperation(ctx, "create") log.InfoContext(ctx, "recreated network for restore", - "instance_id", instanceID, + "id", instanceID, "network", "default", "tap", alloc.TAPDevice) @@ -145,7 +145,7 @@ func (m *manager) ReleaseAllocation(ctx context.Context, alloc *Allocation) erro } log.InfoContext(ctx, "released network", - "instance_id", alloc.InstanceID, + "id", alloc.InstanceID, "network", alloc.Network, "ip", alloc.IP) diff --git a/lib/network/derive.go b/lib/network/derive.go index 7d40900b..19af42a5 100644 --- a/lib/network/derive.go +++ b/lib/network/derive.go @@ -25,7 +25,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All // 1. Load instance metadata to get instance name and network status meta, err := m.loadInstanceMetadata(instanceID) if err != nil { - log.DebugContext(ctx, "failed to load instance metadata", "instance_id", instanceID, "error", err) + log.DebugContext(ctx, "failed to load instance metadata", "id", instanceID, "error", err) return nil, err } @@ -183,4 +183,3 @@ func fileExists(path string) bool { _, err := os.Stat(path) return err == nil } - diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index 690aa379..4709a156 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -54,6 +54,13 @@ const ( Unknown InstanceState = "Unknown" ) +// Defines values for GetInstanceLogsParamsSource. +const ( + App GetInstanceLogsParamsSource = "app" + Hypeman GetInstanceLogsParamsSource = "hypeman" + Vmm GetInstanceLogsParamsSource = "vmm" +) + // AttachVolumeRequest defines model for AttachVolumeRequest. type AttachVolumeRequest struct { // MountPath Path where volume should be mounted @@ -382,8 +389,17 @@ type GetInstanceLogsParams struct { // Follow Continue streaming new lines after initial output Follow *bool `form:"follow,omitempty" json:"follow,omitempty"` + + // Source Log source to stream: + // - app: Guest application logs (serial console output) + // - vmm: Cloud Hypervisor VMM logs (hypervisor stdout+stderr) + // - hypeman: Hypeman operations log (actions taken on this instance) + Source *GetInstanceLogsParamsSource `form:"source,omitempty" json:"source,omitempty"` } +// GetInstanceLogsParamsSource defines parameters for GetInstanceLogs. +type GetInstanceLogsParamsSource string + // CreateVolumeMultipartBody defines parameters for CreateVolume. type CreateVolumeMultipartBody struct { // Content tar.gz archive file containing the volume content @@ -1414,6 +1430,22 @@ func NewGetInstanceLogsRequest(server string, id string, params *GetInstanceLogs } + if params.Source != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "source", runtime.ParamLocationQuery, *params.Source); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + queryURL.RawQuery = queryValues.Encode() } @@ -4365,6 +4397,14 @@ func (siw *ServerInterfaceWrapper) GetInstanceLogs(w http.ResponseWriter, r *htt return } + // ------------- Optional query parameter "source" ------------- + + err = runtime.BindQueryParameter("form", true, false, "source", r.URL.Query(), ¶ms.Source) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "source", Err: err}) + return + } + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { siw.Handler.GetInstanceLogs(w, r, id, params) })) @@ -6597,87 +6637,90 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xdC2/buJb+Kwfae7HOQn4kaXtbXywWmaTtZNC0QdJm9m7TzdDSsc0pRaok5cQN8t8X", - "fEiWLPmRaeI22wAF6lh8nffHw0P5OohEkgqOXKugfx2oaIwJsR/3tCbR+EywLMET/JKh0ubrVIoUpaZo", - "GyUi4/oiJXps/opRRZKmmgoe9INjosdwOUaJMLGjgBqLjMUwQLD9MA7CAK9IkjIM+kE34bobE02CMNDT", - "1HyltKR8FNyEgUQSC86mbpohyZgO+kPCFIZz0x6ZoYEoMF3atk8x3kAIhoQHN3bELxmVGAf9j2UyPhWN", - "xeBPjLSZfF8i0XiYkNFiTnCSYJ0H7/YPgZp+IHGIEnmE0MLOqBNCLKLPKDtUdBkdSCKnXT6i/KrPiEal", - "tyqsWd62zq858uzalhDGRxKVuiVpv2YJ4W3DZDJgCKYRtJi4RBkRhcBQa5QqhJiOqFYhEB5DTNQYFRih", - "/BMiwrnQoDSRGoQE5DFcUj0GYttVOZBM2ySlbeqWGoRBQq7eIB8ZxXu2GwYpMdOZdf3vR9L+2mu/+NTy", - "H9qf/iP/auu//taoXBlzlFYpPBGZpnwE9jEMhQQ9pgpma6AaE9vvbxKHQT/4t+7MmrrelLo5dzOGZq6E", - "8kPXbbtYCZGSTJulli9umfSUJjxarJnIJ+Y/EsfUEEbYceVxjRtVJrzkEyoFT5BrmBBJjbBVWTTXwdt3", - "By8vXr49C/pm5jiLbNcwOH538j7oB7u9Xs+MW1v/WOiUZaMLRb9ixa6D3de/BPML2SvWDwkmQk6tRPwY", - "0BpX1XEoZEI0MPoZ4dyMdx6EcB5svz4Pqoq1Y6eqMcEa7Vr2vMJQCUspx4WWGv4o1nUp5GcmSNzevmPj", - "4qjN2HUS37oHEAk+pKNMEvO9NzME6tU6CGvqbDgSVxRGy6wWB34fox6jBC2A2FBWDGm+MlP47pCvsMQR", - "N2BD1KgpsZigZGTaoMTbvQYt/l1SbSXq+0FM1WcwnVeosBnN6fDTXl2Je81a3LCohjX9YjTK29Q6KykW", - "sr1z5D/urGtXkyjNVGVJO/PLeZslA5QghjChUmeEwf7xh4rL2SkGplzjCKUd2WKMBjfuIIwqKYKXf6EP", - "RENkfKnRP02t113LtbuRLeAoObil3tz5lcXefAXeonGDT0q9W4wypUUCNEau6ZCihBbJtGiPkKMkGmOg", - "QzBOIZViQmOMqxKbCNY28Mt6gDXdlFsueOIqDsUO5YSySDUvRoP6kKdGAymHER2RwVRXg812ry76Zkbn", - "4zex+qWUQtaZG4m4gcS9NGU0ssrRVilGdEgjQDMCmA7QSkg0phwLc6lydUDiC+nFGTYFW00oa9DaUrhz", - "k/mW0DIeMsmYpilD90xtrauxlvIDO1JdY8OAco7yAnP23GKkBJVqjJhzgSynpWhiHX6Mg2w0Miwps+6I", - "KmXxl5cuDCmyuO8C8ErQa6U5W9hCPfA0rKkNb0wIbjOcICsrgbMos9hESIRCT5zQKlRRPiGMxheUp1mj", - "Sixk5atM2ojmBgUyEJm2jswJrDyJ3atYWx+KjMeNzKqx41ckzG3kqpxQmujMx94sMbwVnw0/Z9OJzyvF", - "4QdpEsNhjrXmBJA0OLv9owMYSpEY1KAJ5SghQU38trFY0cfAbpCCMGgbnYoJJoKDGA7/aVZQmErdy2WM", - "GT2dQwCFgdgwgfEF0Q1LK4cQpUmSQuvk1f7u7u6L+Wi987Td225vP32/3ev3zL//CcLARVkDIonGto9D", - "dYdBRz4yzG1WUAk2wRgSwukQlQbfsjyzGpOdp8/6ZBBt7+zGOHzy9Fmn02maBrmW01RQ3jDVy+LZeqLo", - "OlTcno3ZUeNvk8M97GnWoeU6ON57/2vQD7qZkl0mIsK6akB5v/R38efsgf3g/hxQ3rgXKnzu3Eqti/Ee", - "wYRvZ0ZAFQwJZXMZlDRjzH/fN5RwjAqFFNbZLODrqjD/1qgmo18xhsaMhiYjs8dwGvdtqYsw+JJhhhep", - "UNTNXssr+ScGJAwyymKwPaBliMshjv2qCnB2FpJfQpEWNjjYUZv4oIDqZmbTxs+ZcU2ZzTdNKzM+3X32", - "/B+9F9s7JeOmXD97Eqy1lMLtzsF1S7N/GhY+OUUeuwhq1MB9igSfGKuwf9j1GT/jFKfiwPNnNWGYjRHl", - "o4uYNmjn7+4hxFRipO2WfLUNBV2SpqtVsRnVFT6tIL/kkRtji0/W1KPLd/fkTVh+r4rXM06/ZFhC9NXZ", - "341++/Lf6vgff25/eXN29q/J698O3tJ/nbHjd9+UaFieePuu2bOlWyxqvGEla7auehwRHTUAn7FQegHX", - "/BOzlUxM5w7sEw4D7J/zNryhGiVhfTgPSEo7npmdSCTnAbTwikTa9QLBwQwFYyQxyi3T+dglW0zn63yH", - "ejM/RjzlJKERSM/kAVEYm9FUNohFQijfOufn3I8FOSEKMuVkHENEUp1JNBKBKJNsCgNJIuPTXH56NnkI", - "1yRNb7bOuR4TDXilpaEgJVIrs003EDSfwQrar6oD7w04dc0xhglhGSqILKPOeRE/YrMEM4gmcoS6U+zL", - "Ld7vnPOKQjYzpUkxUyF1JdXwvBc2yBFMOyNIRpVGDkUSiiqrvNDKE0XPexXzf957vno7WujQEvWz2l0/", - "W8mVcg37cApsp3bO+GKsdbr6sMT6G2cj8Ov798eGDeb/U8gHmvGiEHFLcDYFYvbFqOByjBw0s5jE5+a2", - "Gk5cwsBJd02C3rvGphtTq+l4aSeG929OQaNMKHf+uxUZdg7N9h3B5kKoUplRRUpgb//o5VZnjcMhy9ti", - "/Uvk+L6gcC5rk+cya37E9Zhlogx/Qzg8CA2c8hY6A1od4x9eCQnMOZiZXffhg8JqUsuKSkg4PPDxmU1z", - "0z73Xv082MpHTOc9RR9OCnxHiqXY+F6x92LImV3aYc/570YxMruDr40eVtdqLC3fv3jXRpjRMQ0+d2JD", - "8WJXsNz8GzhubV7w+VTz7Wy7nKM2kzWrxkz2945Adm+HQO7nZKh+zkPUheIkVWPRQGqepyeQtwG8okpX", - "MENdQN4N1N1M/VSp6vDdedGSdPd650N/AbxBa//D4cGOT6dXp9Ffn5AXz6+uiH7xjF6qF1+TgRz9uUse", - "yNnU0tOkbz0S8hBjvROhJtUq+xl3BIDxXz4ECgOaNsheKTriGMPhMZA4NtGgvCHNh68KffvFTmf72fPO", - "dq/X2e6tsz1PSLRk7qO9/fUn7+24DUufDPpR3MfhN6QHvNhcQCDskkwVnOch+zxwGKEEDkpK6cP6WgnK", - "+lnbXztam5PCysOz2xyWreU97KnsAtd/ak9sb+/3ny70+yulajbTuBqYOSM6tY3zXhe3SVwhRCJjMf93", - "DQNjeQ6qYewRpULtNMW1pQo+8M9cXPIq6S5/Yez3S4ZyCmdHR5Vsl8RhpmxmbA3CRZoulINIbyWGnRXh", - "d+VqSmejmzgPnfeEJf9656ef5a15fnzjtG6NLXpZ72rU5I+d0tjttztJjftGM8CPDoNMQ1EQYVRun4ks", - "hl+nKcoJVUKC2TNM0CLik4xzykdmBBszIvOETUG675d3PiZG/fK+qf1reY/TcaZjccltHzXONJi/7JIN", - "CR4OLR/CaXIf3grbx680NO5/Dle55oTHg2m9+TwGa7nduoHnWkiM7WTeLPvwqjDFwpi98bYU+o/OQ/hj", - "PnuEueVgvM9bemkFYeC5HoSBY2EQBjlnzEdHof1kFx+EgV9ISW9m1uTUsw67ndonecXj3PEeVdpYWpRJ", - "aXBtqTG0MEn1NE8w59azdTtz2SsGbDp/vestQe/FXSQlPyzNQv4/KRsoe6h8kpW+qSbThVv/iybGHh7M", - "Y1u33/HlslW0OneIrHTbHeM1HiEvKct19bHmWZ52G2Xz54S3KMVtLLkaY8ly3O5+Vou7agu3YJN9YSVU", - "oqy0ksWyceHpG+uWqcoLlv8iyzwiXZ3Hcs4QUpTtQiVyOGs86KWk9tjJM8gx1rDgPw2maM67LUfNR+Sq", - "mMHiWaJgrvzL0THL9NgCsK0OnOR1GXSYD2GX0anC62YIvH5Bd65VdWEsq/DOAVCj4Xn/s8SjLbKtOeWc", - "zREuLyI3rgujTFI9PTUBwanhAIlEuZc5NbSRwhJhv55NbnO5Nze2QGco6uS8Ro6SRrB3fGi1JCGcjIzI", - "zo6A0SFG04ihT8XVQIQtCn23f9h2Zwj5zt1mcqi2DDGtE8LN+EEYTFAqN2+vs9Oxpb0iRU5SGvSD3c52", - "x+xkDRssid1xUVXic6PGDm0kO4zt2rWvOzGcVangyvFmp9dzZThce89KZpVY3T+VOxp20XVV7PUzWBbO", - "hQ3DBpcZcAt12FVlSULk1NBuv4VojNFn+6hr8ataSJCBEIeuyTdStN4BmUXTdfxdozSHNn75N2HwpLd9", - "Zxx2JXUN037gJNNjIelXjM2kT+9QrAsnPeQaJScMFMoJSl8gVTbCoP+xan4fP918KsvdsmvGq1SoBlmX", - "LoYEzjGg0r+IeHpnJDZcPbmpOiHjcW9qmrZzZyvwCtbAZJtrHOTVCG5XRdSUR1tOuzYg6F9InJ8QfDeN", - "ftJ7sgGNnivoe0CWdJwxZu8n+GqUWQlR2Z92rw34vnHBjaHb6let7cB+n1tbSiRJzK5T2RXMyejkTRt5", - "JGKDThzrfPLBPPXw0e1NioqBikWFJcbNQ4BPNWt70oDt7ayOlEc1WUNNnHRzxQgXooVvkL+DsLObd3/f", - "eeWPQv6+88odhvx9d292Ae9+lKW3Kdec15Y/Kt9K5XuNPtjPmGZdkzvXX4X2ilYbAXy+gOo2kK9Y4CPq", - "Wwf1ldm1FPgVtWz3CP2ql3PXAn93J+BC2Zq4bR/lCfafDPK9uP9J9wUfMhppaOca6fbqNkVowxlhtm43", - "z9Xb67C+LIdyyBQ+JNPzqS9aaFzZ/3avabwONiwMcik6yFX38CD0FVeuTiqVOKRXzYHfpnjuGiP6dWwc", - "Jfp5K6F6Izq9lwzoKBOZKpfF2AI7VLNLbRUH/NDw6yw8L0SwP7CW9jYZOjYOUB/1/p6g87xAnfN2Bxir", - "wHPeajPg2dc43Ao95yt8RM9roecSu5aj56Le5D7hc/XtKBvHz7m+NTHcH/D9jAj6gaFSwn2Oe1YlVfVx", - "awPUWQ3r8tjvdePwAGyhz6Lk5f3AUj/55nFpfnfhIeaQbMGgfQ9PjgRnsWYxFPzR9KG3Wd+3eQj4kFXs", - "dfmuUDPYso6oy8SoDLvmK1slkmR2rQIiwZVgCKYXEAWndoHtU+QaXk4MdZ1zfoI6k1zZ8hdGlIa3wChH", - "BS3DNikYwxgGU/jDrOoPKNR5y9714SD8a2rY9JybHpRnqEDZtVA+Ao6XfkA6hD+GgjFxactJ/nDXfhba", - "zhtD63eyn3Bxsa6jRQuQlnHu6hTad3HYeW3x8mxi/56Q2VRFUcx2r7F67bqeLrI8bWQpGWp7D4JqShiI", - "TLt3jzQtxHG+eSmLKrZWuxGNV7qLRpfabn1Vg5rnax2Mi5EnDFqnpy+3Hh3GmjHJsqywdGvhnoENbsNX", - "+dp6uUbkfuIa/PRhKy+H/s5quPk8dGkV1F7U4PFgamU7qzN/SAbiFXpGmXXTnq5GG8mfLbQRX57+09vI", - "TD9+ciuJhLQ36VV+c+rhVO6U4GbJ3Fv2UsvsskiYb3nOjo62FhmNu4y90GTk417IF9H99DHF3vN5eNbi", - "Lm6SgoBlmaKuabTMHkT6aA7+wtdj8HiQwcOmwwpqWiNJIhxmzN5vjO2l3ia78LdWu9fuw+GqpOrsNwp+", - "mBSAv1yyapqcwAdhlJ6mGN11tM3bpCju/zzQ4lb74mlPgt1jlNPDzVGg/AscP4923/1JYNMvmax1DrhR", - "28qvev4wtrXpyOfXkBe1lfnxUMzcaVpOiRZzGLD0SoaF9RD+7QwbqYbwruUWtRA5BY/HxmtUQpSYlTv4", - "ptu+CojN1LvmHTjN0lRIrUBfCkhEjMq+1eK303dvYSDiaR+KfhzcOwm8wvnL5P7t42YPRb+i6XtkK4zM", - "9mQoZFIaIO+ZSmynIs2YfVeGLTP1PHbBioAmsjP6CkRGYzrBhhOZ8u8X3GtJx7wjD4MkJ69ryLOvEKgO", - "Ov9m92ItVXlUaYQhZZi/zJbykeWt51c+ROm1CgPKiZyu+06F+R9tmBRh9SH+ZsMRuaJJlhQvR379C7T8", - "ez/tK/7tDxfQYaFTeBUhxsqWMm992+87hIU4Gy5db7TWJ/emCyP8d6zzgZb/2QEwIjYRP1dyLQQwIke4", - "9dNU03tbmxXTHx7MldI/wAqlSa59M5yxZk3SehuMNXH/fdQjFZvPzVYjnf04mLj0JpgHWBI/KWDmojKo", - "H0sFe5sLCZsufzp7wDmU15hD6lLpkx3AjNikMG9ERBjEOEEmUvsqItc2CINMMv9ilX7X/XDHWChtX/sb", - "3Hy6+b8AAAD//zUiAl4YdQAA", + "H4sIAAAAAAAC/+xdC3PTyJb+K6e0d2qdXfmRBLjgW1tbmQSYTBFIEcjsXcyGtnRs99DqFt0tJ4bKf9/q", + "h2TJkh8ZEkMuVFFFbPXrvL8+fVr+EkQiSQVHrlXQ/xKoaIIJsX8eaE2iyblgWYKv8VOGSpuvUylSlJqi", + "bZSIjOuLlOiJ+RSjiiRNNRU86AenRE/gcoISYWpHATURGYthiGD7YRyEAV6RJGUY9INuwnU3JpoEYaBn", + "qflKaUn5OLgOA4kkFpzN3DQjkjEd9EeEKQwXpj0xQwNRYLq0bZ9ivKEQDAkPru2InzIqMQ7678pkvC8a", + "i+GfGGkz+aFEovE4IePlnOAkwToPXh0eAzX9QOIIJfIIoYWdcSeEWEQfUXao6DI6lETOunxM+VWfEY1K", + "71RYs7ptnV8L5Nm1rSCMjyUqdUPSfssSwtuGyWTIEEwjaDFxiTIiCoGh1ihVCDEdU61CIDyGmKgJKjBC", + "+QdEhHOhQWkiNQgJyGO4pHoCxLarciCZtUlK29QtNQiDhFy9QD42ivdoPwxSYqYz6/q/d6T9udd+8r7l", + "/2i//4/8q53//lujcmXMUVql8LXINOVjsI9hJCToCVUwXwPVmNh+f5M4CvrBv3Xn1tT1ptTNuZsxNHMl", + "lB+7brvFSoiUZNYstXxxq6SnNOHRcs1EPjX/kTimhjDCTiuPa9yoMuEpn1IpeIJcw5RIaoStyqL5Erx8", + "dfT04unL86BvZo6zyHYNg9NXr98E/WC/1+uZcWvrnwidsmx8oehnrNh1sP/812BxIQfF+iHBRMiZlYgf", + "A1qTqjqOhEyIBkY/IgzMeIMghEGw+3wQVBVrz05VY4I12o3seY2hEpZSjkstNfxerOtSyI9MkLi9e8vG", + "xVGbseskvnQPIBJ8RMeZJOZ7b2YI1Kt1ENbU2XAkriiMllktDvwxQT1BCVoAsaGsGNJ8Zabw3SFfYYkj", + "bsCGqFFTYjFFycisQYl3ew1a/Iek2krU94OYqo9gOq9RYTOa0+GHvboS95q1uGFRDWv61WiUt6lNVlIs", + "ZHfvxP+5t6ldTaM0U5Ul7S0u52WWDFGCGMGUSp0RBoenbysuZ68YmHKNY5R2ZIsxGty4gzCqpAhe/oU+", + "EA2R8aVG/zS1Xncj1+5GtoCj5OBWenPnV5Z78zV4i8YNPin1bjHKlBYJ0Bi5piOKElok06I9Ro6SaIyB", + "jsA4hVSKKY0xrkpsKljbwC/rATZ0U2654ImrOBQ7lBPKMtW8GA/rQ54ZDaQcxnRMhjNdDTa7vbromxmd", + "j9/E6qdSCllnbiTiBhIP0pTRyCpHW6UY0RGNAM0IYDpAKyHRhHIszKXK1SGJL6QXZ9gUbDWhrEFrS+HO", + "TeZbQst4yCRjmqYM3TO1s6nGWsqP7Eh1jQ0DyjnKC8zZc4ORElSqMWIuBLKclqKJdfgxDrPx2LCkzLoT", + "qpTFX166MKLI4r4LwGtBr5XmfGFL9cDTsKE2vDAhuM1wiqysBM6izGITIREKPXFCq1BF+ZQwGl9QnmaN", + "KrGUlc8yaSOaGxTIUGTaOjInsPIkdq9ibX0kMh43MqvGjt+QMLeRq3JCaaIzH3uzxPBWfDT8nE8nPq4V", + "hx+kSQzHOdZaEEDS4OwOT45gJEViUIMmlKOEBDXx28ZiRe8Cu0EKwqBtdCommAgOYjT6h1lBYSp1L5cx", + "ZvR0AQEUBmLDBMYXRDcsrRxClCZJCq3Xzw739/efLEbrvYft3m579+Gb3V6/Z/79bxAGLsoaEEk0tn0c", + "qjsMOvaRYWGzgkqwKcaQEE5HqDT4luWZ1YTsPXzUJ8Nod28/xtGDh486nU7TNMi1nKWC8oapnhbPNhNF", + "16Hi9nzMjpp8nRzuYE+zCS1fgtODN78F/aCbKdllIiKsq4aU90ufi4/zB/YP93FIeeNeqPC5Cyu1LsZ7", + "BBO+nRkBVTAilC1kUNKMMf9931DCMSoUUlhns4Sv68L8S6OajH7GGBozGpqMzR7DadzXpS7C4FOGGV6k", + "QlE3ey2v5J8YkDDMKIvB9oCWIS6HOParKsDZW0p+CUVa2OBgR23iowKqm5lNGz9nxjVlNt80q8z4cP/R", + "47/3nuzulYybcv3oQbDRUgq3uwDXLc3+aVj45BR57CKoUQP3VyT41FiF/WDXZ/yMU5yKA8+f1YRhNkaU", + "jy9i2qCdf7iHEFOJkbZb8vU2FHRJmq5XxWZUV/i0gvySR26MLT5ZU48u39yTN2H5gypezzj9lGEJ0Vdn", + "fzX+/dP/qNO//7n76cX5+T+nz38/ekn/ec5OX31VomF14u2bZs9WbrGo8YaVrNmm6nFCdNQAfCZC6SVc", + "80/MVjIxnTtwSDgMsT/gbXhBNUrC+jAISEo7npmdSCSDAFp4RSLteoHgYIaCCZIY5Y7pfOqSLabzl3yH", + "er04RjzjJKERSM/kIVEYm9FUNoxFQijfGfAB92NBToiCTDkZxxCRVGcSjUQgyiSbwVCSyPg0l5+eTx7C", + "F5Km1zsDridEA15paShIidTKbNMNBM1nsIL2q+rAGwNOXXOMYUpYhgoiy6gBL+JHbJZgBtFEjlF3in25", + "xfudAa8oZDNTmhQzFVJXUg2Pe2GDHMG0M4JkVGnkUCShqLLKC608UfS4VzH/x73H67ejhQ6tUD+r3fWz", + "lVwpN7APp8B2aueMLyZap+sPS6y/cTYCv715c2rYYP4/g3ygOS8KEbcEZzMgZl+MCi4nyEEzi0l8bm6n", + "4cQlDJx0NyTojWtsujG1no6ndmJ48+IMNMqEcue/W5Fh58hs3xFsLoQqlRlVpAQODk+e7nQ2OByyvC3W", + "v0KObwoKF7I2eS6z5kdcj3kmyvA3hOOj0MApb6FzoNUx/uGZkMCcg5nbdR/eKqwmtayohITjIx+f2Sw3", + "7YH36oNgJx8xXfQUfXhd4DtSLMXG94q9F0PO7dIOO+B/GMXI7A6+NnpYXauxtHz/4l0bYUbHNPjciQ3F", + "y13BavNv4Li1ecEXU803s+1yjtpM1qwac9nfOQLZvxkCuZuTofo5D1EXipNUTUQDqXmenkDeBvCKKl3B", + "DHUBeTdQdzP1U6Wqw3fnRSvS3ZudD/0F8Aatw7fHR3s+nV6dRn9+QJ48vroi+skjeqmefE6GcvznPrkn", + "Z1MrT5O+9kjIQ4zNToSaVKvsZ9wRAMZ/+RAoDGjaIHul6JhjDMenQOLYRIPyhjQfvir03Sd7nd1Hjzu7", + "vV5nt7fJ9jwh0Yq5Tw4ON5+8t+c2LH0y7EdxH0dfkR7wYnMBgbBLMlMwyEP2IHAYoQQOSkrpw/pGCcr6", + "WdtfO1pbkMLaw7ObHJZt5D3sqewS139mT2xv7vcfLvX7a6VqNtO4Hpg5IzqzjfNeFzdJXCFEImMx/3cN", + "Q2N5Dqph7BGlQu00xbWlCt7yj1xc8irpLn9h7PdThnIG5ycnlWyXxFGmbGZsA8JFmi6Vg0hvJIa9NeF3", + "7WpKZ6PbOA9d9IQl/3rrp5/lrXl+fOO0boMtelnvatTkj53S2O23O0mN+0YzwI8Ow0xDURBhVO6QiSyG", + "32YpyilVQoLZM0zRIuLXGeeUj80INmZE5gmbgXTfr+58Soz65X1T+2l1j7NJpmNxyW0fNck0mE92yYYE", + "D4dWD+E0uQ8vhe3jVxoa97+Aq1xzwuPhrN58EYO13G7dwHMtJMZ2Mm+WfXhWmGJhzN54Wwr9n85D+GM+", + "e4S542C8z1t6aQVh4LkehIFjYRAGOWfMn45C+5ddfBAGfiElvZlbk1PPOux2ap/kFY8Lx3tUaWNpUSal", + "wbWlxtDCJNWzPMGcW8/OzczloBiw6fz1trcEvSe3kZR8uzIL+S9SNlD2UPkka31TTaZLt/4XTYw9PlrE", + "tm6/48tlq2h14RBZ6bY7xms8Ql5RluvqY82zPO02zhbPCW9QittYcjXBkuW43f28FnfdFm7JJvvCSqhE", + "WWkly2XjwtNX1i1TlRcs/0WWeUS6Po/lnCGkKNuFSuRw1njQS0ntsZNnkGOsYcF/GUzRnHdbjZpPyFUx", + "g8WzRMFC+ZejY57psQVgOx14nddl0FE+hF1GpwqvmyHw5gXduVbVhbGqwjsHQI2G5/3PCo+2zLYWlHM+", + "R7i6iNy4LowySfXszAQEp4ZDJBLlQebU0EYKS4T9ej65zeVeX9sCnZGok/McOUoawcHpsdWShHAyNiI7", + "PwFGRxjNIoY+FVcDEbYo9NXhcdudIeQ7d5vJodoyxLROCDfjB2EwRancvL3OXseW9ooUOUlp0A/2O7sd", + "s5M1bLAkdidFVYnPjRo7tJHsOLZr177uxHBWpYIrx5u9Xs+V4XDtPSuZV2J1/1TuaNhF13Wx189gWbgQ", + "NgwbXGbALdRhV5UlCZEzQ7v9FqIJRh/to67Fr2opQQZCHLsmX0nRZgdkFk3X8XeN0hza+OVfh8GD3u6t", + "cdiV1DVM+5aTTE+EpJ8xNpM+vEWxLp30mGuUnDBQKKcofYFU2QiD/ruq+b17f/2+LHfLrjmvUqEaZF26", + "GBI4x4BK/yri2a2R2HD15LrqhIzHva5p2t6trcArWAOTba5xmFcjuF0VUTMe7Tjt2oKgfyVxfkLwzTT6", + "Qe/BFjR6oaDvHlnSacaYvZ/gq1HmJURlf9r9YsD3tQtuDN1Wv2ptR/b73NpSIklidp3KrmBBRq9ftJFH", + "IjboxLHOJx/MUw8f3d6kqBioWFRYYtwiBHhfs7YHDdjezupI+akmG6iJk26uGOFStPAV8ncQdn7z7pe9", + "Z/4o5Je9Z+4w5Jf9g/kFvLtRlt62XHNeW/5T+dYq33P0wX7ONOua3Ln+OrRXtNoK4PMFVDeBfMUCf6K+", + "TVBfmV0rgV9Ry3aH0K96OXcj8Hd7Ai6UrYnb9lGeYP/BIN+Tu5/0UPARo5GGdq6Rbq9uU4Q2nBFm63bz", + "XL29DuvLciiHTOF9Mj2f+qKFxpX9b/cLjTfBhoVBrkQHueoeH4W+4srVSaUSR/SqOfDbFM9tY0S/jq2j", + "RD9vJVRvRacPkiEdZyJT5bIYW2CHan6preKA7xt+nYfnpQj2O9bS3jZDx9YB6k+9vyPovChQ57zdAcY6", + "8Jy32g549jUON0LP+Qp/oueN0HOJXavRc1Fvcpfwufp2lK3j51zfmhjuD/h+RAR9z1Ap4T7HPa+Sqvq4", + "jQHqvIZ1dez3unF8BLbQZ1ny8m5gqZ98+7g0v7twH3NItmDQvocnR4LzWLMcCn5v+tDbru/bPgS8zyr2", + "vHxXqBlsWUfUZWJchl2Lla0SSTK/VgGmNRAFZ3Zh7TPkGp5ODVWdAc8vFn1QIpMRfoBCUUELUMgw0nA5", + "odHEjGO/s+PbUswPJE0/FBfodvrw3F7mKXHXTd5SKClhEAmuBHMljR+mSfKhXy9YOD85sZ1sm4krTfjQ", + "h7xIobAxZVoN+IC/Rp1JriwVjCgNL4FRjgpaRuBSMIYxDGfwwfCzRN+OvaVkRnS3Y9hswE0PyjNUnkrK", + "x8Dx0g9IR/BhJBgTl7YQ5oO7sLTU6l8YKX0jyw+Xlxk7WrQAaRnnLn2hfYuIndeWXc8n9m84mU9VlPPs", + "9hrr7r7UE12Wp40sJSNtb3BQbfRDZNq9NaVpIY7zzUtZWmtWf8HLGJymL6gySdNN1dcv02rxNElW6DC0", + "JvMvlY5Fpv9T6RiluwvstXuZckOLRO6DJh/dzdXKfSlXaNvEKkdhM6sCdz8/r891n6ZJEoSBX09Dve0G", + "kUTjle6icSttx9aqT10csL4fM5KxHaF1dvZ052fM2BCWWJZVnb1nYEPk8IXetmSycfP22jX44ZFLXhH/", + "jdVw+0cRpVVQe1eHx8OZle38qsF9MhCv0HPKbLzzdDXaSP5sqY34Gwo/vI3M9eMHt5JISPsyBZVfnrs/", + "xVulHUfJ3Fv2XtP8vlCY73rPT052lhmNu4+/1GTkz+2wr6P84WOKvep1/6zF3d0lBQGrkoVd02iVPYj0", + "pzn4O38/g8e9DB42I1pQ0xpLEuEoY/aKa2zvdTfZhb+43P3i/jhel1ef/0zFd5NL8feL1k2TE3gvjNLT", + "FKO7kbh9mxTFFbB7Wt9s3z3uSbB7jPIJQXMUKP8Iy4+j3bd/GNz0YzYbHQVv1bby277fjW1tO/L5NeR1", + "jWV+3Bczd5qWU6LFAgYsvZVjaUmMf0HHVgpivGu5QTlMTsHPyoENimFKzModfNOFbwXEHnm45h04y9JU", + "SK1AXwpIRIzKHkH8fvbqJQxFPOtD0Y+Dey2FVzj/PgH/Anqzh6Kf0fQ9sUVmZnsyEjIpDZD3TCW2U5Fm", + "zL4uxVYaex67YEVAE9kZfwYiowmdYsPRVvknLO60qmfRkYdBkpPXNeTZt0hUB118uX+xlqo8qjTCiDLM", + "32dM+djy1vMrH6L0Zo0h5UTONn2txuLvdkyLsHoff7bjhFzRJEuK92M//xVa/tWv9lce7G9X0FGhU3gV", + "IcbKHljtfN1PfISFOBvu3W+13Cv3pksj/Dcs9YKW/+UJMCI2ET9Xci0EMCLHuPPDXKjwtja/T3F8tHCb", + "4h4WqU1z7ZvjjA3L0jbbYGyI+++iJK3YfG63IO38+8HEpZcB3cNbEdMCZi6rhPu+VLC3vZCw7Qq483uc", + "Q3mOOaQuVb/ZAcyITQrzQkSEQYxTZCK1b6NybYMwyCTz79bpd91vt0yE0vbNz8H1++v/DwAA///8wcL2", + "G3cAAA==", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/paths/paths.go b/lib/paths/paths.go index c9e6596a..65e66a97 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -160,9 +160,19 @@ func (p *Paths) InstanceLogs(id string) string { return filepath.Join(p.InstanceDir(id), "logs") } -// InstanceConsoleLog returns the path to instance console log file. -func (p *Paths) InstanceConsoleLog(id string) string { - return filepath.Join(p.InstanceLogs(id), "console.log") +// InstanceAppLog returns the path to instance application log (guest serial console). +func (p *Paths) InstanceAppLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "app.log") +} + +// InstanceVMMLog returns the path to instance VMM log (Cloud Hypervisor stdout+stderr). +func (p *Paths) InstanceVMMLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "vmm.log") +} + +// InstanceHypemanLog returns the path to instance hypeman operations log. +func (p *Paths) InstanceHypemanLog(id string) string { + return filepath.Join(p.InstanceLogs(id), "hypeman.log") } // InstanceSnapshots returns the path to instance snapshots directory. diff --git a/lib/providers/providers.go b/lib/providers/providers.go index f6046ff7..62523063 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -21,11 +21,21 @@ import ( "go.opentelemetry.io/otel" ) -// ProvideLogger provides a structured logger with subsystem-specific levels -func ProvideLogger() *slog.Logger { +// ProvideLogger provides a structured logger with subsystem-specific levels. +// Wraps with InstanceLogHandler to automatically write logs with "id" attribute +// to per-instance hypeman.log files. +func ProvideLogger(p *paths.Paths) *slog.Logger { cfg := logger.NewConfig() otelHandler := hypemanotel.GetGlobalLogHandler() - return logger.NewSubsystemLogger(logger.SubsystemAPI, cfg, otelHandler) + baseLogger := logger.NewSubsystemLogger(logger.SubsystemAPI, cfg, otelHandler) + + // Wrap the handler with instance log handler for per-instance logging + logPathFunc := func(id string) string { + return p.InstanceHypemanLog(id) + } + instanceHandler := logger.NewInstanceLogHandler(baseLogger.Handler(), logPathFunc) + + return slog.New(instanceHandler) } // ProvideContext provides a context with logger attached diff --git a/lib/vmm/client.go b/lib/vmm/client.go index e7da17a2..322dac88 100644 --- a/lib/vmm/client.go +++ b/lib/vmm/client.go @@ -117,33 +117,29 @@ func StartProcessWithArgs(ctx context.Context, p *paths.Paths, version CHVersion Setpgid: true, // Create new process group } - // Redirect stdout/stderr to log files (process won't block on I/O) + // Redirect stdout/stderr to combined VMM log file (process won't block on I/O) instanceDir := filepath.Dir(socketPath) - stdoutFile, err := os.OpenFile( - filepath.Join(instanceDir, "ch-stdout.log"), - os.O_CREATE|os.O_WRONLY|os.O_APPEND, - 0644, - ) - if err != nil { - return 0, fmt.Errorf("create stdout log: %w", err) + logsDir := filepath.Join(instanceDir, "logs") + if err := os.MkdirAll(logsDir, 0755); err != nil { + return 0, fmt.Errorf("create logs directory: %w", err) } - // Note: These defers close the parent's file descriptors after cmd.Start(). - // The child process receives duplicated file descriptors during fork/exec, - // so it can continue writing to the log files even after we close them here. - defer stdoutFile.Close() - stderrFile, err := os.OpenFile( - filepath.Join(instanceDir, "ch-stderr.log"), + vmmLogFile, err := os.OpenFile( + filepath.Join(logsDir, "vmm.log"), os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644, ) if err != nil { - return 0, fmt.Errorf("create stderr log: %w", err) + return 0, fmt.Errorf("create vmm log: %w", err) } - defer stderrFile.Close() - - cmd.Stdout = stdoutFile - cmd.Stderr = stderrFile + // Note: This defer closes the parent's file descriptor after cmd.Start(). + // The child process receives a duplicated file descriptor during fork/exec, + // so it can continue writing to the log file even after we close it here. + defer vmmLogFile.Close() + + // Both stdout and stderr go to the same file + cmd.Stdout = vmmLogFile + cmd.Stderr = vmmLogFile if err := cmd.Start(); err != nil { return 0, fmt.Errorf("start cloud-hypervisor: %w", err) diff --git a/openapi.yaml b/openapi.yaml index 6984a1ff..19e39ee1 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -930,7 +930,12 @@ paths: get: summary: Stream instance logs (SSE) description: | - Streams instance console logs as Server-Sent Events. + Streams instance logs as Server-Sent Events. + Use the `source` parameter to select which log to stream: + - `app` (default): Guest application logs (serial console) + - `vmm`: Cloud Hypervisor VMM logs + - `hypeman`: Hypeman operations log + Returns the last N lines (controlled by `tail` parameter), then optionally continues streaming new lines if `follow=true`. operationId: getInstanceLogs @@ -957,6 +962,18 @@ paths: type: boolean default: false description: Continue streaming new lines after initial output + - name: source + in: query + required: false + schema: + type: string + enum: [app, vmm, hypeman] + default: app + description: | + Log source to stream: + - app: Guest application logs (serial console output) + - vmm: Cloud Hypervisor VMM logs (hypervisor stdout+stderr) + - hypeman: Hypeman operations log (actions taken on this instance) responses: 200: description: Log stream (SSE) From 713620e3e6b0d17a5e526e339658d59a9d0c3f71 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 13:57:11 -0500 Subject: [PATCH 02/14] Fix test --- cmd/api/api/exec_test.go | 3 ++- lib/instances/manager_test.go | 4 ++-- lib/vmm/client_test.go | 25 +++++++++++-------------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 129e2a92..d40f48a7 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" @@ -280,7 +281,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // collectTestLogs collects logs from an instance (non-streaming) func collectTestLogs(t *testing.T, svc *ApiService, instanceID string, n int) string { - logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false) + logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false, instances.LogSourceApp) if err != nil { return "" } diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index d4633bfe..68dc37c2 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -123,7 +123,7 @@ func waitForLogMessage(ctx context.Context, mgr *manager, instanceID, message st // collectLogs gets the last N lines of logs (non-streaming) func collectLogs(ctx context.Context, mgr *manager, instanceID string, n int) (string, error) { - logChan, err := mgr.StreamInstanceLogs(ctx, instanceID, n, false) + logChan, err := mgr.StreamInstanceLogs(ctx, instanceID, n, false, LogSourceApp) if err != nil { return "", err } @@ -672,7 +672,7 @@ func TestBasicEndToEnd(t *testing.T) { streamCtx, streamCancel := context.WithCancel(ctx) defer streamCancel() - logChan, err := manager.StreamInstanceLogs(streamCtx, inst.Id, 10, true) + logChan, err := manager.StreamInstanceLogs(streamCtx, inst.Id, 10, true, LogSourceApp) require.NoError(t, err) // Create unique marker diff --git a/lib/vmm/client_test.go b/lib/vmm/client_test.go index 2bde9a2a..e88cf609 100644 --- a/lib/vmm/client_test.go +++ b/lib/vmm/client_test.go @@ -161,15 +161,15 @@ func TestStartProcessCreatesLogFiles(t *testing.T) { require.NoError(t, err) assert.Greater(t, pid, 0) - // Verify log files exist - they are created and accessible by the daemon - stdoutLog := filepath.Join(tmpDir, "ch-stdout.log") - stderrLog := filepath.Join(tmpDir, "ch-stderr.log") + // Verify logs directory and vmm.log file exist + logsDir := filepath.Join(tmpDir, "logs") + vmmLog := filepath.Join(logsDir, "vmm.log") - _, err = os.Stat(stdoutLog) - require.NoError(t, err, "stdout log should exist") + _, err = os.Stat(logsDir) + require.NoError(t, err, "logs directory should exist") - _, err = os.Stat(stderrLog) - require.NoError(t, err, "stderr log should exist") + _, err = os.Stat(vmmLog) + require.NoError(t, err, "vmm.log should exist") // Verify the daemon is running and responsive client, err := NewVMM(socketPath) @@ -179,16 +179,13 @@ func TestStartProcessCreatesLogFiles(t *testing.T) { require.NoError(t, err) assert.Equal(t, 200, pingResp.StatusCode()) - // Read log files - with verbose mode, Cloud Hypervisor writes to logs - stdoutContent, err := os.ReadFile(stdoutLog) - require.NoError(t, err) - stderrContent, err := os.ReadFile(stderrLog) + // Read log file - with verbose mode, Cloud Hypervisor writes to logs + vmmContent, err := os.ReadFile(vmmLog) require.NoError(t, err) // Verify that logs contain output (proves daemon can write after parent closed files) - totalLogSize := len(stdoutContent) + len(stderrContent) - assert.Greater(t, totalLogSize, 0, - "Cloud Hypervisor daemon should write logs even after parent closed the file descriptors") + assert.Greater(t, len(vmmContent), 0, + "Cloud Hypervisor daemon should write logs even after parent closed the file descriptor") // Cleanup client.ShutdownVMMWithResponse(ctx) From f2525b78eae42060ae8985cd554acf3628061a43 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:14:52 -0500 Subject: [PATCH 03/14] Review instance log handler --- lib/logger/instance_handler.go | 89 +++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index f599b60f..20a25c5b 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -15,11 +15,21 @@ import ( // InstanceLogHandler wraps an slog.Handler and additionally writes logs // that have an "id" attribute to a per-instance hypeman.log file. // This provides automatic per-instance logging without manual instrumentation. +// +// Implementation follows the slog handler guide for shared state across +// WithAttrs/WithGroup: https://pkg.go.dev/golang.org/x/example/slog-handler-guide type InstanceLogHandler struct { slog.Handler logPathFunc func(id string) string // returns path to hypeman.log for an instance - mu sync.Mutex - fileCache map[string]*os.File + state *sharedState // shared across all handlers derived via WithAttrs/WithGroup +} + +// sharedState holds state that must be shared across all handler instances +// derived from the same parent via WithAttrs/WithGroup. +// Using a pointer ensures all derived handlers share the same mutex and file cache. +type sharedState struct { + mu sync.Mutex + fileCache map[string]*os.File } // NewInstanceLogHandler creates a new handler that wraps the given handler @@ -29,7 +39,9 @@ func NewInstanceLogHandler(wrapped slog.Handler, logPathFunc func(id string) str return &InstanceLogHandler{ Handler: wrapped, logPathFunc: logPathFunc, - fileCache: make(map[string]*os.File), + state: &sharedState{ + fileCache: make(map[string]*os.File), + }, } } @@ -66,28 +78,7 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record return } - // Get or create file handle - h.mu.Lock() - f, ok := h.fileCache[instanceID] - if !ok { - // Ensure directory exists - dir := filepath.Dir(logPath) - if err := os.MkdirAll(dir, 0755); err != nil { - h.mu.Unlock() - return // silently skip if can't create directory - } - - var err error - f, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - h.mu.Unlock() - return // silently skip if can't open file - } - h.fileCache[instanceID] = f - } - h.mu.Unlock() - - // Format log line: timestamp LEVEL message key=value key=value... + // Format log line outside the lock: timestamp LEVEL message key=value key=value... timestamp := r.Time.Format(time.RFC3339) level := r.Level.String() msg := r.Message @@ -108,10 +99,28 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record } line += "\n" - // Write to file (best effort, don't block on errors) - h.mu.Lock() + // Get or create file handle and write (single lock acquisition) + h.state.mu.Lock() + defer h.state.mu.Unlock() + + f, ok := h.state.fileCache[instanceID] + if !ok { + // Ensure directory exists + dir := filepath.Dir(logPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return // silently skip if can't create directory + } + + var err error + f, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return // silently skip if can't open file + } + h.state.fileCache[instanceID] = f + } + + // Write to file (best effort) f.WriteString(line) - h.mu.Unlock() } // Enabled reports whether the handler handles records at the given level. @@ -120,45 +129,45 @@ func (h *InstanceLogHandler) Enabled(ctx context.Context, level slog.Level) bool } // WithAttrs returns a new handler with the given attributes. +// The new handler shares the same state (mutex and file cache) as the parent. func (h *InstanceLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler { return &InstanceLogHandler{ Handler: h.Handler.WithAttrs(attrs), logPathFunc: h.logPathFunc, - fileCache: h.fileCache, - mu: sync.Mutex{}, + state: h.state, // same pointer = shared mutex and cache } } // WithGroup returns a new handler with the given group name. +// The new handler shares the same state (mutex and file cache) as the parent. func (h *InstanceLogHandler) WithGroup(name string) slog.Handler { return &InstanceLogHandler{ Handler: h.Handler.WithGroup(name), logPathFunc: h.logPathFunc, - fileCache: h.fileCache, - mu: sync.Mutex{}, + state: h.state, // same pointer = shared mutex and cache } } // CloseInstanceLog closes and removes a cached file handle for an instance. // Call this when an instance is deleted. func (h *InstanceLogHandler) CloseInstanceLog(instanceID string) { - h.mu.Lock() - defer h.mu.Unlock() + h.state.mu.Lock() + defer h.state.mu.Unlock() - if f, ok := h.fileCache[instanceID]; ok { + if f, ok := h.state.fileCache[instanceID]; ok { f.Close() - delete(h.fileCache, instanceID) + delete(h.state.fileCache, instanceID) } } // CloseAll closes all cached file handles. // Call this during shutdown. func (h *InstanceLogHandler) CloseAll() { - h.mu.Lock() - defer h.mu.Unlock() + h.state.mu.Lock() + defer h.state.mu.Unlock() - for id, f := range h.fileCache { + for id, f := range h.state.fileCache { f.Close() - delete(h.fileCache, id) + delete(h.state.fileCache, id) } } From 5c328756454d6a8fa1e5defc27022f363fe4c4ea Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:18:44 -0500 Subject: [PATCH 04/14] Use log not found error --- lib/instances/logs.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/instances/logs.go b/lib/instances/logs.go index 7431b219..a022081a 100644 --- a/lib/instances/logs.go +++ b/lib/instances/logs.go @@ -59,6 +59,11 @@ func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, f logPath = m.paths.InstanceAppLog(id) } + // Check if log file exists before starting tail + if _, err := os.Stat(logPath); os.IsNotExist(err) { + return nil, ErrLogNotFound + } + // Build tail command args := []string{"-n", strconv.Itoa(tail)} if follow { From 3dd1f9d401e7159374e75c511198c9e6f2fafda8 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:36:06 -0500 Subject: [PATCH 05/14] Don't delete taps when instance state is unknown --- cmd/api/main.go | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/cmd/api/main.go b/cmd/api/main.go index fc269678..f6ae3c1a 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -146,10 +146,18 @@ func run() error { "kernel", kernelVer) // Initialize network manager (creates default network if needed) - // Get running instance IDs for TAP cleanup - runningIDs := getRunningInstanceIDs(app) + // Get instance IDs that might have a running VMM for TAP cleanup safety. + // Include Unknown state: we couldn't confirm their state, but they might still + // have a running VMM. Better to leave a stale TAP than crash a running VM. + allInstances, _ := app.InstanceManager.ListInstances(app.Ctx) + var preserveTAPs []string + for _, inst := range allInstances { + if inst.State == instances.StateRunning || inst.State == instances.StateUnknown { + preserveTAPs = append(preserveTAPs, inst.Id) + } + } logger.Info("Initializing network manager...") - if err := app.NetworkManager.Initialize(app.Ctx, runningIDs); err != nil { + if err := app.NetworkManager.Initialize(app.Ctx, preserveTAPs); err != nil { logger.Error("failed to initialize network manager", "error", err) return fmt.Errorf("initialize network manager: %w", err) } @@ -355,21 +363,6 @@ func run() error { return err } -// getRunningInstanceIDs returns IDs of instances currently in Running state -func getRunningInstanceIDs(app *application) []string { - allInstances, err := app.InstanceManager.ListInstances(app.Ctx) - if err != nil { - return nil - } - var running []string - for _, inst := range allInstances { - if inst.State == instances.StateRunning { - running = append(running, inst.Id) - } - } - return running -} - // checkKVMAccess verifies KVM is available and the user has permission to use it func checkKVMAccess() error { f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0) From e826b2333ec46a49fcb278e1b173e444daaf9347 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:39:55 -0500 Subject: [PATCH 06/14] Review fixes 1. Exec endpoint now has logger injection: 2. Simplified InstanceLogHandler - no more file caching: - Removed `sharedState` struct with mutex and fileCache - Each write now opens, writes, closes the file - No cleanup methods needed = no leak possible - Simpler code, no shared state complexity The performance impact is negligible since instance operations (start, stop, standby, etc.) are infrequent. The tradeoff of slightly more I/O vs. guaranteed no leaks and simpler code is worth it. --- cmd/api/main.go | 1 + lib/logger/instance_handler.go | 72 ++++++---------------------------- 2 files changed, 13 insertions(+), 60 deletions(-) diff --git a/cmd/api/main.go b/cmd/api/main.go index f6ae3c1a..fb480a48 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -207,6 +207,7 @@ func run() error { middleware.RequestID, middleware.RealIP, middleware.Recoverer, + mw.InjectLogger(logger), mw.AccessLogger(accessLogger), mw.JwtAuth(app.Config.JwtSecret), ).Get("/instances/{id}/exec", app.ApiService.ExecHandler) diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index 20a25c5b..2012bddf 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -8,7 +8,6 @@ import ( "log/slog" "os" "path/filepath" - "sync" "time" ) @@ -21,15 +20,6 @@ import ( type InstanceLogHandler struct { slog.Handler logPathFunc func(id string) string // returns path to hypeman.log for an instance - state *sharedState // shared across all handlers derived via WithAttrs/WithGroup -} - -// sharedState holds state that must be shared across all handler instances -// derived from the same parent via WithAttrs/WithGroup. -// Using a pointer ensures all derived handlers share the same mutex and file cache. -type sharedState struct { - mu sync.Mutex - fileCache map[string]*os.File } // NewInstanceLogHandler creates a new handler that wraps the given handler @@ -39,9 +29,6 @@ func NewInstanceLogHandler(wrapped slog.Handler, logPathFunc func(id string) str return &InstanceLogHandler{ Handler: wrapped, logPathFunc: logPathFunc, - state: &sharedState{ - fileCache: make(map[string]*os.File), - }, } } @@ -72,13 +59,14 @@ func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { } // writeToInstanceLog writes a log record to the instance's hypeman.log file. +// Opens and closes the file for each write to avoid file handle leaks. func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record) { logPath := h.logPathFunc(instanceID) if logPath == "" { return } - // Format log line outside the lock: timestamp LEVEL message key=value key=value... + // Format log line: timestamp LEVEL message key=value key=value... timestamp := r.Time.Format(time.RFC3339) level := r.Level.String() msg := r.Message @@ -99,27 +87,19 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record } line += "\n" - // Get or create file handle and write (single lock acquisition) - h.state.mu.Lock() - defer h.state.mu.Unlock() - - f, ok := h.state.fileCache[instanceID] - if !ok { - // Ensure directory exists - dir := filepath.Dir(logPath) - if err := os.MkdirAll(dir, 0755); err != nil { - return // silently skip if can't create directory - } + // Ensure directory exists + dir := filepath.Dir(logPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return // silently skip if can't create directory + } - var err error - f, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - return // silently skip if can't open file - } - h.state.fileCache[instanceID] = f + // Open, write, close (no caching = no leak) + f, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return // silently skip if can't open file } + defer f.Close() - // Write to file (best effort) f.WriteString(line) } @@ -129,45 +109,17 @@ func (h *InstanceLogHandler) Enabled(ctx context.Context, level slog.Level) bool } // WithAttrs returns a new handler with the given attributes. -// The new handler shares the same state (mutex and file cache) as the parent. func (h *InstanceLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler { return &InstanceLogHandler{ Handler: h.Handler.WithAttrs(attrs), logPathFunc: h.logPathFunc, - state: h.state, // same pointer = shared mutex and cache } } // WithGroup returns a new handler with the given group name. -// The new handler shares the same state (mutex and file cache) as the parent. func (h *InstanceLogHandler) WithGroup(name string) slog.Handler { return &InstanceLogHandler{ Handler: h.Handler.WithGroup(name), logPathFunc: h.logPathFunc, - state: h.state, // same pointer = shared mutex and cache - } -} - -// CloseInstanceLog closes and removes a cached file handle for an instance. -// Call this when an instance is deleted. -func (h *InstanceLogHandler) CloseInstanceLog(instanceID string) { - h.state.mu.Lock() - defer h.state.mu.Unlock() - - if f, ok := h.state.fileCache[instanceID]; ok { - f.Close() - delete(h.state.fileCache, instanceID) - } -} - -// CloseAll closes all cached file handles. -// Call this during shutdown. -func (h *InstanceLogHandler) CloseAll() { - h.state.mu.Lock() - defer h.state.mu.Unlock() - - for id, f := range h.state.fileCache { - f.Close() - delete(h.state.fileCache, id) } } From 6af82cc78cff823e14e787b1bd464af36efb53b4 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:46:14 -0500 Subject: [PATCH 07/14] add logs --- lib/logger/instance_handler.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index 2012bddf..56aeebd3 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -90,17 +90,23 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record // Ensure directory exists dir := filepath.Dir(logPath) if err := os.MkdirAll(dir, 0755); err != nil { - return // silently skip if can't create directory + // Use package-level slog (not our handler) to avoid recursion. + // No "id" attr means this won't trigger writeToInstanceLog. + slog.Warn("failed to create instance log directory", "path", dir, "error", err) + return } // Open, write, close (no caching = no leak) f, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) if err != nil { - return // silently skip if can't open file + slog.Warn("failed to open instance log file", "path", logPath, "error", err) + return } defer f.Close() - f.WriteString(line) + if _, err := f.WriteString(line); err != nil { + slog.Warn("failed to write to instance log file", "path", logPath, "error", err) + } } // Enabled reports whether the handler handles records at the given level. From c92b5458268800eee08e8efe98437d86b72cdec2 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 14:59:53 -0500 Subject: [PATCH 08/14] Fix which id --- cmd/api/api/exec.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index e234f4cf..c557dcf8 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -105,7 +105,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session started log.InfoContext(ctx, "exec session started", - "id", instanceID, + "id", inst.Id, "subject", subject, "command", execReq.Command, "tty", execReq.TTY, @@ -133,7 +133,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { if err != nil { log.ErrorContext(ctx, "exec failed", "error", err, - "id", instanceID, + "id", inst.Id, "subject", subject, "duration_ms", duration.Milliseconds(), ) @@ -148,7 +148,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session ended log.InfoContext(ctx, "exec session ended", - "id", instanceID, + "id", inst.Id, "subject", subject, "exit_code", exit.Code, "duration_ms", duration.Milliseconds(), From db6c89f4b7ddc3df329208aa8ad61b4d36759dfd Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 15:10:52 -0500 Subject: [PATCH 09/14] Reivew fixes --- cmd/api/main.go | 15 +++++++++++---- lib/logger/instance_handler.go | 27 ++++++++++++++++++++++++++- lib/network/bridge.go | 12 ++++++++---- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/cmd/api/main.go b/cmd/api/main.go index fb480a48..18ef5a24 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -149,11 +149,18 @@ func run() error { // Get instance IDs that might have a running VMM for TAP cleanup safety. // Include Unknown state: we couldn't confirm their state, but they might still // have a running VMM. Better to leave a stale TAP than crash a running VM. - allInstances, _ := app.InstanceManager.ListInstances(app.Ctx) var preserveTAPs []string - for _, inst := range allInstances { - if inst.State == instances.StateRunning || inst.State == instances.StateUnknown { - preserveTAPs = append(preserveTAPs, inst.Id) + allInstances, err := app.InstanceManager.ListInstances(app.Ctx) + if err != nil { + // On error, skip TAP cleanup entirely to avoid crashing running VMs. + // Pass nil to Initialize to skip cleanup. + logger.Warn("failed to list instances for TAP cleanup, skipping cleanup", "error", err) + preserveTAPs = nil + } else { + for _, inst := range allInstances { + if inst.State == instances.StateRunning || inst.State == instances.StateUnknown { + preserveTAPs = append(preserveTAPs, inst.Id) + } } } logger.Info("Initializing network manager...") diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index 56aeebd3..677946dd 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -20,6 +20,7 @@ import ( type InstanceLogHandler struct { slog.Handler logPathFunc func(id string) string // returns path to hypeman.log for an instance + preAttrs []slog.Attr // attrs added via WithAttrs (needed to find "id") } // NewInstanceLogHandler creates a new handler that wraps the given handler @@ -40,8 +41,16 @@ func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { return err } - // Check for instance ID in attributes + // Check for instance ID in pre-bound attrs first (from WithAttrs) var instanceID string + for _, a := range h.preAttrs { + if a.Key == "id" { + instanceID = a.Value.String() + break + } + } + + // Then check record attrs (overrides pre-bound if present) r.Attrs(func(a slog.Attr) bool { if a.Key == "id" { instanceID = a.Value.String() @@ -72,7 +81,13 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record msg := r.Message // Collect attributes (excluding "id" since it's implicit) + // Include both pre-bound attrs and record attrs var attrs []string + for _, a := range h.preAttrs { + if a.Key != "id" { + attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) + } + } r.Attrs(func(a slog.Attr) bool { if a.Key != "id" { attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) @@ -115,17 +130,27 @@ func (h *InstanceLogHandler) Enabled(ctx context.Context, level slog.Level) bool } // WithAttrs returns a new handler with the given attributes. +// Tracks attrs locally so we can find "id" even when added via With(). func (h *InstanceLogHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + // Combine existing pre-attrs with new ones + newPreAttrs := make([]slog.Attr, len(h.preAttrs), len(h.preAttrs)+len(attrs)) + copy(newPreAttrs, h.preAttrs) + newPreAttrs = append(newPreAttrs, attrs...) + return &InstanceLogHandler{ Handler: h.Handler.WithAttrs(attrs), logPathFunc: h.logPathFunc, + preAttrs: newPreAttrs, } } // WithGroup returns a new handler with the given group name. func (h *InstanceLogHandler) WithGroup(name string) slog.Handler { + // Note: We don't track groups for "id" lookup since instance IDs + // should always be at the top level, not nested in groups. return &InstanceLogHandler{ Handler: h.Handler.WithGroup(name), logPathFunc: h.logPathFunc, + preAttrs: h.preAttrs, } } diff --git a/lib/network/bridge.go b/lib/network/bridge.go index 45db556e..75c2b456 100644 --- a/lib/network/bridge.go +++ b/lib/network/bridge.go @@ -430,7 +430,7 @@ func (m *manager) createTAPDevice(tapName, bridgeName string, isolated bool) err // This allows Cloud Hypervisor (running as current user) to access the TAP uid := os.Getuid() gid := os.Getgid() - + tap := &netlink.Tuntap{ LinkAttrs: netlink.LinkAttrs{ Name: tapName, @@ -535,10 +535,17 @@ func (m *manager) queryNetworkState(bridgeName string) (*Network, error) { // CleanupOrphanedTAPs removes TAP devices that aren't used by any running instance. // runningInstanceIDs is a list of instance IDs that currently have a running VMM. +// Pass nil to skip cleanup entirely (used when we couldn't determine running instances). // Returns the number of TAPs deleted. func (m *manager) CleanupOrphanedTAPs(ctx context.Context, runningInstanceIDs []string) int { log := logger.FromContext(ctx) + // If nil, skip cleanup entirely to avoid accidentally deleting TAPs for running VMs + if runningInstanceIDs == nil { + log.DebugContext(ctx, "skipping TAP cleanup (nil instance list)") + return 0 + } + // Build set of expected TAP names for running instances expectedTAPs := make(map[string]bool) for _, id := range runningInstanceIDs { @@ -578,6 +585,3 @@ func (m *manager) CleanupOrphanedTAPs(ctx context.Context, runningInstanceIDs [] return deleted } - - - From 390d6c6fc432cea5d01d66ba038900a04c3ec3ab Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 15:26:18 -0500 Subject: [PATCH 10/14] run cleanup when no vm --- cmd/api/main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/api/main.go b/cmd/api/main.go index 18ef5a24..1b502b89 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -157,6 +157,8 @@ func run() error { logger.Warn("failed to list instances for TAP cleanup, skipping cleanup", "error", err) preserveTAPs = nil } else { + // Initialize to empty slice (not nil) so cleanup runs even with no running VMs + preserveTAPs = []string{} for _, inst := range allInstances { if inst.State == instances.StateRunning || inst.State == instances.StateUnknown { preserveTAPs = append(preserveTAPs, inst.Id) From b6c7c37728a24d2f8b5c885253150b92b5fb797c Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 16:29:10 -0500 Subject: [PATCH 11/14] Move resource id, partial id, name resolution to middleware --- cmd/api/api/api_test.go | 65 ++++++++--- cmd/api/api/exec.go | 20 +--- cmd/api/api/exec_test.go | 4 +- cmd/api/api/images.go | 45 +++---- cmd/api/api/images_test.go | 49 ++++---- cmd/api/api/ingress.go | 54 ++------- cmd/api/api/instances.go | 206 +++++---------------------------- cmd/api/api/instances_test.go | 23 ++-- cmd/api/api/registry_test.go | 41 ++++--- cmd/api/api/resolvers.go | 104 +++++++++++++++++ cmd/api/api/volumes.go | 61 ++-------- cmd/api/api/volumes_test.go | 21 ++-- cmd/api/main.go | 5 + lib/instances/manager.go | 2 +- lib/instances/query.go | 6 +- lib/logger/instance_handler.go | 11 +- lib/middleware/README.md | 21 ++++ lib/middleware/resolve.go | 192 ++++++++++++++++++++++++++++++ 18 files changed, 511 insertions(+), 419 deletions(-) create mode 100644 cmd/api/api/resolvers.go create mode 100644 lib/middleware/README.md create mode 100644 lib/middleware/resolve.go diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index 9f010002..8aaa4064 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -11,6 +11,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" @@ -98,6 +99,36 @@ func ctx() context.Context { return context.Background() } +// ctxWithInstance creates a context with a resolved instance (simulates ResolveResource middleware) +func ctxWithInstance(svc *ApiService, idOrName string) context.Context { + inst, err := svc.InstanceManager.GetInstance(ctx(), idOrName) + if err != nil { + return ctx() // Let handler deal with the error + } + return mw.WithResolvedInstance(ctx(), inst.Id, inst) +} + +// ctxWithVolume creates a context with a resolved volume (simulates ResolveResource middleware) +func ctxWithVolume(svc *ApiService, idOrName string) context.Context { + vol, err := svc.VolumeManager.GetVolume(ctx(), idOrName) + if err != nil { + vol, err = svc.VolumeManager.GetVolumeByName(ctx(), idOrName) + } + if err != nil { + return ctx() + } + return mw.WithResolvedVolume(ctx(), vol.Id, vol) +} + +// ctxWithImage creates a context with a resolved image (simulates ResolveResource middleware) +func ctxWithImage(svc *ApiService, name string) context.Context { + img, err := svc.ImageManager.GetImage(ctx(), name) + if err != nil { + return ctx() + } + return mw.WithResolvedImage(ctx(), img.Name, img) +} + // createAndWaitForImage creates an image and waits for it to be ready. // Returns the image name on success, or fails the test on error/timeout. func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, timeout time.Duration) string { @@ -117,24 +148,26 @@ func createAndWaitForImage(t *testing.T, svc *ApiService, imageName string, time t.Log("Waiting for image to be ready...") deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - imgResp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{ - Name: imageName, - }) - require.NoError(t, err) - - img, ok := imgResp.(oapi.GetImage200JSONResponse) - if ok { - switch img.Status { - case "ready": - t.Log("Image is ready") - return imgCreated.Name - case "failed": - t.Fatalf("Image build failed: %v", img.Error) - default: - t.Logf("Image status: %s", img.Status) + // Get image from manager (may fail during pending/pulling, that's OK) + img, err := svc.ImageManager.GetImage(ctx(), imageName) + if err != nil { + time.Sleep(100 * time.Millisecond) + continue + } + + switch img.Status { + case "ready": + t.Log("Image is ready") + return imgCreated.Name + case "failed": + errMsg := "" + if img.Error != nil { + errMsg = *img.Error } + t.Fatalf("Image build failed: %v", errMsg) } - time.Sleep(1 * time.Second) + // Still pending/pulling/converting, poll again + time.Sleep(100 * time.Millisecond) } t.Fatalf("Timeout waiting for image %s to be ready", imageName) diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index c557dcf8..cc6dc29f 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -10,11 +10,11 @@ import ( "sync" "time" - "github.com/go-chi/chi/v5" "github.com/gorilla/websocket" "github.com/onkernel/hypeman/lib/exec" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" ) var upgrader = websocket.Upgrader{ @@ -36,24 +36,14 @@ type ExecRequest struct { } // ExecHandler handles exec requests via WebSocket for bidirectional streaming +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() - log := logger.FromContext(ctx) startTime := time.Now() + log := logger.FromContext(ctx) - instanceID := chi.URLParam(r, "id") - - // Get instance - inst, err := s.InstanceManager.GetInstance(ctx, instanceID) - if err != nil { - if err == instances.ErrNotFound { - http.Error(w, `{"code":"not_found","message":"instance not found"}`, http.StatusNotFound) - return - } - log.ErrorContext(ctx, "failed to get instance", "error", err) - http.Error(w, `{"code":"internal_error","message":"failed to get instance"}`, http.StatusInternalServerError) - return - } + // Get instance resolved by middleware + inst := mw.GetResolvedInstance[instances.Instance](ctx) if inst.State != instances.StateRunning { http.Error(w, fmt.Sprintf(`{"code":"invalid_state","message":"instance must be running (current state: %s)"}`, inst.State), http.StatusConflict) diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index d40f48a7..219d2ea7 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -153,7 +153,7 @@ func TestExecInstanceNonTTY(t *testing.T) { // Cleanup t.Log("Cleaning up instance...") - delResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{ + delResp, err := svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{ Id: inst.Id, }) require.NoError(t, err) @@ -212,7 +212,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // Cleanup on exit t.Cleanup(func() { t.Log("Cleaning up instance...") - svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: inst.Id}) + svc.DeleteInstance(ctxWithInstance(svc, inst.Id), oapi.DeleteInstanceRequestObject{Id: inst.Id}) }) // Get actual instance to access vsock fields diff --git a/cmd/api/api/images.go b/cmd/api/api/images.go index 68f4cc96..0c2842a2 100644 --- a/cmd/api/api/images.go +++ b/cmd/api/api/images.go @@ -6,6 +6,7 @@ import ( "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" ) @@ -60,46 +61,26 @@ func (s *ApiService) CreateImage(ctx context.Context, request oapi.CreateImageRe return oapi.CreateImage202JSONResponse(imageToOAPI(*img)), nil } +// GetImage gets image details by name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestObject) (oapi.GetImageResponseObject, error) { - log := logger.FromContext(ctx) - - img, err := s.ImageManager.GetImage(ctx, request.Name) - if err != nil { - switch { - case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound): - return oapi.GetImage404JSONResponse{ - Code: "not_found", - Message: "image not found", - }, nil - default: - log.ErrorContext(ctx, "failed to get image", "error", err, "name", request.Name) - return oapi.GetImage500JSONResponse{ - Code: "internal_error", - Message: "failed to get image", - }, nil - } - } + img := mw.GetResolvedImage[images.Image](ctx) return oapi.GetImage200JSONResponse(imageToOAPI(*img)), nil } +// DeleteImage deletes an image by name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteImage(ctx context.Context, request oapi.DeleteImageRequestObject) (oapi.DeleteImageResponseObject, error) { + img := mw.GetResolvedImage[images.Image](ctx) log := logger.FromContext(ctx) - err := s.ImageManager.DeleteImage(ctx, request.Name) + err := s.ImageManager.DeleteImage(ctx, img.Name) if err != nil { - switch { - case errors.Is(err, images.ErrInvalidName), errors.Is(err, images.ErrNotFound): - return oapi.DeleteImage404JSONResponse{ - Code: "not_found", - Message: "image not found", - }, nil - default: - log.ErrorContext(ctx, "failed to delete image", "error", err, "name", request.Name) - return oapi.DeleteImage500JSONResponse{ - Code: "internal_error", - Message: "failed to delete image", - }, nil - } + log.ErrorContext(ctx, "failed to delete image", "error", err) + return oapi.DeleteImage500JSONResponse{ + Code: "internal_error", + Message: "failed to delete image", + }, nil } return oapi.DeleteImage204Response{}, nil } diff --git a/cmd/api/api/images_test.go b/cmd/api/api/images_test.go index 168578d3..9d5f0590 100644 --- a/cmd/api/api/images_test.go +++ b/cmd/api/api/images_test.go @@ -26,15 +26,10 @@ func TestListImages_Empty(t *testing.T) { func TestGetImage_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{ - Name: "non-existent:latest", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetImage404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) - assert.Equal(t, "image not found", notFound.Message) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error. + _, err := svc.ImageManager.GetImage(ctx(), "non-existent:latest") + require.Error(t, err) } func TestCreateImage_Async(t *testing.T) { @@ -69,7 +64,7 @@ func TestCreateImage_Async(t *testing.T) { img := oapi.Image(acceptedResp) require.Equal(t, "docker.io/library/alpine:latest", img.Name) require.NotEmpty(t, img.Digest, "digest should be populated immediately") - t.Logf("Image created: name=%s, digest=%s, initial_status=%s, queue_position=%v", + t.Logf("Image created: name=%s, digest=%s, initial_status=%s, queue_position=%v", img.Name, img.Digest, img.Status, img.QueuePosition) // Construct digest reference for polling: repository@digest @@ -81,9 +76,9 @@ func TestCreateImage_Async(t *testing.T) { t.Log("Polling for completion...") lastStatus := img.Status lastQueuePos := getQueuePos(img.QueuePosition) - + for i := 0; i < 3000; i++ { - getResp, err := svc.GetImage(ctx, oapi.GetImageRequestObject{Name: digestRef}) + getResp, err := svc.GetImage(ctxWithImage(svc, digestRef), oapi.GetImageRequestObject{Name: digestRef}) require.NoError(t, err) imgResp, ok := getResp.(oapi.GetImage200JSONResponse) @@ -93,16 +88,16 @@ func TestCreateImage_Async(t *testing.T) { currentImg := oapi.Image(imgResp) currentQueuePos := getQueuePos(currentImg.QueuePosition) - + // Log when status or queue position changes if currentImg.Status != lastStatus || currentQueuePos != lastQueuePos { t.Logf("Update: status=%s, queue_position=%v", currentImg.Status, formatQueuePos(currentImg.QueuePosition)) - + // Queue position should only decrease (never increase) if lastQueuePos > 0 && currentQueuePos > lastQueuePos { t.Errorf("Queue position increased: %d -> %d", lastQueuePos, currentQueuePos) } - + lastStatus = currentImg.Status lastQueuePos = currentQueuePos } @@ -194,7 +189,7 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted1, ok := resp1.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img1 := oapi.Image(accepted1) @@ -211,17 +206,17 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted2, ok := resp2.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img2 := oapi.Image(accepted2) require.Equal(t, imageName, img2.Name) require.Equal(t, img1.Digest, img2.Digest, "should have same digest") - + // Log actual status to see what's happening - t.Logf("Second call: digest=%s, status=%s, queue_position=%v, error=%v", + t.Logf("Second call: digest=%s, status=%s, queue_position=%v, error=%v", img2.Digest, img2.Status, formatQueuePos(img2.QueuePosition), img2.Error) - + // If it failed, we need to see why if img2.Status == oapi.ImageStatus(images.StatusFailed) { if img2.Error != nil { @@ -229,7 +224,7 @@ func TestCreateImage_Idempotent(t *testing.T) { } t.Fatal("Build failed - this is the root cause of test failures") } - + require.Equal(t, oapi.ImageStatus(images.StatusPending), img2.Status) require.NotNil(t, img2.QueuePosition, "should have queue position") require.Equal(t, 1, *img2.QueuePosition, "should still be at position 1") @@ -243,14 +238,14 @@ func TestCreateImage_Idempotent(t *testing.T) { // Wait for build to complete - poll by digest (tag symlink doesn't exist until status=ready) t.Log("Waiting for build to complete...") for i := 0; i < 3000; i++ { - getResp, err := svc.GetImage(ctx, oapi.GetImageRequestObject{Name: digestRef}) + getResp, err := svc.GetImage(ctxWithImage(svc, digestRef), oapi.GetImageRequestObject{Name: digestRef}) require.NoError(t, err) imgResp, ok := getResp.(oapi.GetImage200JSONResponse) require.True(t, ok, "expected 200 response") currentImg := oapi.Image(imgResp) - + if currentImg.Status == oapi.ImageStatus(images.StatusReady) { t.Log("Build complete!") break @@ -273,7 +268,7 @@ func TestCreateImage_Idempotent(t *testing.T) { Body: &oapi.CreateImageRequest{Name: imageName}, }) require.NoError(t, err) - + accepted3, ok := resp3.(oapi.CreateImage202JSONResponse) require.True(t, ok, "expected 202 response") img3 := oapi.Image(accepted3) @@ -282,9 +277,9 @@ func TestCreateImage_Idempotent(t *testing.T) { require.Nil(t, img3.QueuePosition, "ready image should have no queue position") require.NotNil(t, img3.SizeBytes) require.Greater(t, *img3.SizeBytes, int64(0)) - t.Logf("Third call: status=%s, queue_position=%v, size=%d", + t.Logf("Third call: status=%s, queue_position=%v, size=%d", img3.Status, formatQueuePos(img3.QueuePosition), *img3.SizeBytes) - + t.Log("Idempotency test passed!") } @@ -301,5 +296,3 @@ func formatQueuePos(pos *int) string { } return fmt.Sprintf("%d", *pos) } - - diff --git a/cmd/api/api/ingress.go b/cmd/api/api/ingress.go index bdac447d..ea35cb10 100644 --- a/cmd/api/api/ingress.go +++ b/cmd/api/api/ingress.go @@ -6,6 +6,7 @@ import ( "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" ) @@ -119,58 +120,25 @@ func (s *ApiService) CreateIngress(ctx context.Context, request oapi.CreateIngre } // GetIngress gets ingress details by ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetIngress(ctx context.Context, request oapi.GetIngressRequestObject) (oapi.GetIngressResponseObject, error) { - log := logger.FromContext(ctx) - - ing, err := s.IngressManager.Get(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, ingress.ErrNotFound): - return oapi.GetIngress404JSONResponse{ - Code: "not_found", - Message: "ingress not found", - }, nil - case errors.Is(err, ingress.ErrAmbiguousName): - return oapi.GetIngress409JSONResponse{ - Code: "ambiguous_identifier", - Message: "identifier matches multiple ingresses, please use a more specific ID or name", - }, nil - default: - log.ErrorContext(ctx, "failed to get ingress", "error", err, "id", request.Id) - return oapi.GetIngress500JSONResponse{ - Code: "internal_error", - Message: "failed to get ingress", - }, nil - } - } - + ing := mw.GetResolvedIngress[ingress.Ingress](ctx) return oapi.GetIngress200JSONResponse(ingressToOAPI(*ing)), nil } // DeleteIngress deletes an ingress by ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteIngress(ctx context.Context, request oapi.DeleteIngressRequestObject) (oapi.DeleteIngressResponseObject, error) { + ing := mw.GetResolvedIngress[ingress.Ingress](ctx) log := logger.FromContext(ctx) - err := s.IngressManager.Delete(ctx, request.Id) + err := s.IngressManager.Delete(ctx, ing.ID) if err != nil { - switch { - case errors.Is(err, ingress.ErrNotFound): - return oapi.DeleteIngress404JSONResponse{ - Code: "not_found", - Message: "ingress not found", - }, nil - case errors.Is(err, ingress.ErrAmbiguousName): - return oapi.DeleteIngress409JSONResponse{ - Code: "ambiguous_identifier", - Message: "identifier matches multiple ingresses, please use a more specific ID or name", - }, nil - default: - log.ErrorContext(ctx, "failed to delete ingress", "error", err, "id", request.Id) - return oapi.DeleteIngress500JSONResponse{ - Code: "internal_error", - Message: "failed to delete ingress", - }, nil - } + log.ErrorContext(ctx, "failed to delete ingress", "error", err) + return oapi.DeleteIngress500JSONResponse{ + Code: "internal_error", + Message: "failed to delete ingress", + }, nil } return oapi.DeleteIngress204Response{}, nil diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 602b622e..96abc5a1 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -10,6 +10,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" "github.com/samber/lo" @@ -172,64 +173,22 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst // GetInstance gets instance details // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRequestObject) (oapi.GetInstanceResponseObject, error) { - log := logger.FromContext(ctx) - - inst, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.GetInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.GetInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.GetInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } + inst := mw.GetResolvedInstance[instances.Instance](ctx) return oapi.GetInstance200JSONResponse(instanceToOAPI(*inst)), nil } // DeleteInstance stops and deletes an instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInstanceRequestObject) (oapi.DeleteInstanceResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) log := logger.FromContext(ctx) - // Resolve to get the actual instance ID - inst, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.DeleteInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.DeleteInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.DeleteInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - err = s.InstanceManager.DeleteInstance(ctx, inst.Id) + err := s.InstanceManager.DeleteInstance(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to delete instance", "error", err, "id", inst.Id) + log.ErrorContext(ctx, "failed to delete instance", "error", err) return oapi.DeleteInstance500JSONResponse{ Code: "internal_error", Message: "failed to delete instance", @@ -240,33 +199,12 @@ func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInst // StandbyInstance puts an instance in standby (pause, snapshot, delete VMM) // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyInstanceRequestObject) (oapi.StandbyInstanceResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) log := logger.FromContext(ctx) - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StandbyInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StandbyInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StandbyInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - inst, err := s.InstanceManager.StandbyInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -275,45 +213,24 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to standby instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to standby instance", "error", err) return oapi.StandbyInstance500JSONResponse{ Code: "internal_error", Message: "failed to standby instance", }, nil } } - return oapi.StandbyInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StandbyInstance200JSONResponse(instanceToOAPI(*result)), nil } // RestoreInstance restores an instance from standby // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreInstanceRequestObject) (oapi.RestoreInstanceResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) log := logger.FromContext(ctx) - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.RestoreInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.RestoreInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.RestoreInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - inst, err := s.InstanceManager.RestoreInstance(ctx, resolved.Id) + result, err := s.InstanceManager.RestoreInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -322,45 +239,24 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to restore instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to restore instance", "error", err) return oapi.RestoreInstance500JSONResponse{ Code: "internal_error", Message: "failed to restore instance", }, nil } } - return oapi.RestoreInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.RestoreInstance200JSONResponse(instanceToOAPI(*result)), nil } // StopInstance gracefully stops a running instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstanceRequestObject) (oapi.StopInstanceResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) log := logger.FromContext(ctx) - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StopInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StopInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StopInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - inst, err := s.InstanceManager.StopInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StopInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -369,45 +265,24 @@ func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstance Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to stop instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to stop instance", "error", err) return oapi.StopInstance500JSONResponse{ Code: "internal_error", Message: "failed to stop instance", }, nil } } - return oapi.StopInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StopInstance200JSONResponse(instanceToOAPI(*result)), nil } // StartInstance starts a stopped instance // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StartInstance(ctx context.Context, request oapi.StartInstanceRequestObject) (oapi.StartInstanceResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) log := logger.FromContext(ctx) - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.StartInstance404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.StartInstance404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id) - return oapi.StartInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - inst, err := s.InstanceManager.StartInstance(ctx, resolved.Id) + result, err := s.InstanceManager.StartInstance(ctx, inst.Id) if err != nil { switch { case errors.Is(err, instances.ErrInvalidState): @@ -416,14 +291,14 @@ func (s *ApiService) StartInstance(ctx context.Context, request oapi.StartInstan Message: err.Error(), }, nil default: - log.ErrorContext(ctx, "failed to start instance", "error", err, "id", resolved.Id) + log.ErrorContext(ctx, "failed to start instance", "error", err) return oapi.StartInstance500JSONResponse{ Code: "internal_error", Message: "failed to start instance", }, nil } } - return oapi.StartInstance200JSONResponse(instanceToOAPI(*inst)), nil + return oapi.StartInstance200JSONResponse(instanceToOAPI(*result)), nil } // logsStreamResponse implements oapi.GetInstanceLogsResponseObject with proper SSE flushing @@ -455,7 +330,10 @@ func (r logsStreamResponse) VisitGetInstanceLogsResponse(w http.ResponseWriter) // With follow=false (default), streams last N lines then closes // With follow=true, streams last N lines then continues following new output // The id parameter can be an instance ID, name, or ID prefix +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstanceLogsRequestObject) (oapi.GetInstanceLogsResponseObject, error) { + inst := mw.GetResolvedInstance[instances.Instance](ctx) + tail := 100 if request.Params.Tail != nil { tail = *request.Params.Tail @@ -479,29 +357,7 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan } } - // Resolve to get the actual instance ID - resolved, err := s.InstanceManager.GetInstance(ctx, request.Id) - if err != nil { - switch { - case errors.Is(err, instances.ErrNotFound): - return oapi.GetInstanceLogs404JSONResponse{ - Code: "not_found", - Message: "instance not found", - }, nil - case errors.Is(err, instances.ErrAmbiguousName): - return oapi.GetInstanceLogs404JSONResponse{ - Code: "ambiguous", - Message: "multiple instances match, use full instance ID", - }, nil - default: - return oapi.GetInstanceLogs500JSONResponse{ - Code: "internal_error", - Message: "failed to get instance", - }, nil - } - } - - logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, resolved.Id, tail, follow, source) + logChan, err := s.InstanceManager.StreamInstanceLogs(ctx, inst.Id, tail, follow, source) if err != nil { switch { case errors.Is(err, instances.ErrTailNotFound): diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index ce1801e6..82f38861 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -26,14 +26,10 @@ func TestListInstances_Empty(t *testing.T) { func TestGetInstance_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{ - Id: "non-existent", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetInstance404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error type. + _, err := svc.InstanceManager.GetInstance(ctx(), "non-existent") + require.Error(t, err) } func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { @@ -174,7 +170,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 2. Stop the instance t.Log("Stopping instance...") - stopResp, err := svc.StopInstance(ctx(), oapi.StopInstanceRequestObject{Id: instanceID}) + stopResp, err := svc.StopInstance(ctxWithInstance(svc, instanceID), oapi.StopInstanceRequestObject{Id: instanceID}) require.NoError(t, err) stopped, ok := stopResp.(oapi.StopInstance200JSONResponse) @@ -184,7 +180,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 3. Start the instance t.Log("Starting instance...") - startResp, err := svc.StartInstance(ctx(), oapi.StartInstanceRequestObject{Id: instanceID}) + startResp, err := svc.StartInstance(ctxWithInstance(svc, instanceID), oapi.StartInstanceRequestObject{Id: instanceID}) require.NoError(t, err) started, ok := startResp.(oapi.StartInstance200JSONResponse) @@ -196,7 +192,7 @@ func TestInstanceLifecycle_StopStart(t *testing.T) { // 4. Cleanup - delete the instance t.Log("Deleting instance...") - deleteResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: instanceID}) + deleteResp, err := svc.DeleteInstance(ctxWithInstance(svc, instanceID), oapi.DeleteInstanceRequestObject{Id: instanceID}) require.NoError(t, err) _, ok = deleteResp.(oapi.DeleteInstance204Response) require.True(t, ok, "expected 204 response for delete") @@ -208,16 +204,15 @@ func waitForState(t *testing.T, svc *ApiService, instanceID string, expectedStat t.Helper() deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - resp, err := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{Id: instanceID}) + // Use manager directly to poll state (middleware not needed for polling) + inst, err := svc.InstanceManager.GetInstance(ctx(), instanceID) require.NoError(t, err) - if inst, ok := resp.(oapi.GetInstance200JSONResponse); ok { if string(inst.State) == expectedState { t.Logf("Instance reached %s state", expectedState) return } t.Logf("Instance state: %s (waiting for %s)", inst.State, expectedState) - } time.Sleep(100 * time.Millisecond) } t.Fatalf("Timeout waiting for instance to reach %s state", expectedState) diff --git a/cmd/api/api/registry_test.go b/cmd/api/api/registry_test.go index 45d20af6..1e9e2554 100644 --- a/cmd/api/api/registry_test.go +++ b/cmd/api/api/registry_test.go @@ -150,11 +150,11 @@ func TestRegistryPushAndCreateInstance(t *testing.T) { assert.Equal(t, "test-pushed-image", instance.Name) t.Logf("Instance created: %s (state: %s)", instance.Id, instance.State) - // Verify instance reaches Running state + // Verify instance reaches Running state (use manager directly for polling) deadline := time.Now().Add(30 * time.Second) for time.Now().Before(deadline) { - resp, _ := svc.GetInstance(ctx(), oapi.GetInstanceRequestObject{Id: instance.Id}) - if inst, ok := resp.(oapi.GetInstance200JSONResponse); ok { + inst, err := svc.InstanceManager.GetInstance(ctx(), instance.Id) + if err == nil { if inst.State == "Running" { t.Log("Instance is running!") return // Success! @@ -559,42 +559,41 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) return t.transport.RoundTrip(req) } -// waitForImageReady polls GetImage until the image reaches Ready status. +// waitForImageReady polls ImageManager until the image reaches Ready status. // Returns the image response on success, fails the test on error or timeout. func waitForImageReady(t *testing.T, svc *ApiService, imageName string, timeout time.Duration) oapi.GetImage200JSONResponse { t.Helper() t.Logf("Waiting for image %s to be ready...", imageName) deadline := time.Now().Add(timeout) - var lastStatus oapi.ImageStatus + var lastStatus string var lastError string for time.Now().Before(deadline) { - resp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{Name: imageName}) + img, err := svc.ImageManager.GetImage(ctx(), imageName) if err != nil { time.Sleep(1 * time.Second) continue } - imgResp, ok := resp.(oapi.GetImage200JSONResponse) - if !ok { - time.Sleep(1 * time.Second) - continue + lastStatus = string(img.Status) + if img.Error != nil { + lastError = *img.Error } - lastStatus = imgResp.Status - if imgResp.Error != nil { - lastError = *imgResp.Error - } - - switch imgResp.Status { - case oapi.Ready: - t.Logf("Image ready: %s (digest=%s)", imgResp.Name, imgResp.Digest) - return imgResp - case oapi.Failed: + switch img.Status { + case "ready": + t.Logf("Image ready: %s (digest=%s)", img.Name, img.Digest) + return oapi.GetImage200JSONResponse{ + Name: img.Name, + Digest: img.Digest, + Status: oapi.ImageStatus(img.Status), + SizeBytes: img.SizeBytes, + } + case "failed": t.Fatalf("Image conversion failed: %s", lastError) default: - t.Logf("Image status: %s", imgResp.Status) + t.Logf("Image status: %s", img.Status) } time.Sleep(2 * time.Second) } diff --git a/cmd/api/api/resolvers.go b/cmd/api/api/resolvers.go new file mode 100644 index 00000000..30480c4d --- /dev/null +++ b/cmd/api/api/resolvers.go @@ -0,0 +1,104 @@ +package api + +import ( + "context" + "errors" + "net/http" + + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/ingress" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/middleware" + "github.com/onkernel/hypeman/lib/volumes" +) + +// InstanceResolver adapts instances.Manager to middleware.ResourceResolver. +type InstanceResolver struct { + Manager instances.Manager +} + +func (r InstanceResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + inst, err := r.Manager.GetInstance(ctx, idOrName) + if err != nil { + return "", nil, err + } + return inst.Id, inst, nil +} + +// VolumeResolver adapts volumes.Manager to middleware.ResourceResolver. +type VolumeResolver struct { + Manager volumes.Manager +} + +func (r VolumeResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + // Try by ID first, then by name + vol, err := r.Manager.GetVolume(ctx, idOrName) + if errors.Is(err, volumes.ErrNotFound) { + vol, err = r.Manager.GetVolumeByName(ctx, idOrName) + } + if err != nil { + return "", nil, err + } + return vol.Id, vol, nil +} + +// IngressResolver adapts ingress.Manager to middleware.ResourceResolver. +type IngressResolver struct { + Manager ingress.Manager +} + +func (r IngressResolver) Resolve(ctx context.Context, idOrName string) (string, any, error) { + ing, err := r.Manager.Get(ctx, idOrName) + if err != nil { + return "", nil, err + } + return ing.ID, ing, nil +} + +// ImageResolver adapts images.Manager to middleware.ResourceResolver. +// Note: Images are looked up by name (OCI reference), not ID. +type ImageResolver struct { + Manager images.Manager +} + +func (r ImageResolver) Resolve(ctx context.Context, name string) (string, any, error) { + img, err := r.Manager.GetImage(ctx, name) + if err != nil { + return "", nil, err + } + return img.Name, img, nil +} + +// NewResolvers creates Resolvers from the ApiService managers. +func (s *ApiService) NewResolvers() middleware.Resolvers { + return middleware.Resolvers{ + Instance: InstanceResolver{Manager: s.InstanceManager}, + Volume: VolumeResolver{Manager: s.VolumeManager}, + Ingress: IngressResolver{Manager: s.IngressManager}, + Image: ImageResolver{Manager: s.ImageManager}, + } +} + +// ResolverErrorResponder handles resolver errors by writing appropriate HTTP responses. +func ResolverErrorResponder(w http.ResponseWriter, err error, lookup string) { + w.Header().Set("Content-Type", "application/json") + + switch { + case errors.Is(err, instances.ErrNotFound), + errors.Is(err, volumes.ErrNotFound), + errors.Is(err, ingress.ErrNotFound), + errors.Is(err, images.ErrNotFound): + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"code":"not_found","message":"resource not found"}`)) + + case errors.Is(err, instances.ErrAmbiguousName), + errors.Is(err, volumes.ErrAmbiguousName), + errors.Is(err, ingress.ErrAmbiguousName): + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"code":"ambiguous","message":"multiple resources match, use full ID"}`)) + + default: + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"code":"internal_error","message":"failed to resolve resource"}`)) + } +} diff --git a/cmd/api/api/volumes.go b/cmd/api/api/volumes.go index a66d743b..e98eef07 100644 --- a/cmd/api/api/volumes.go +++ b/cmd/api/api/volumes.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/onkernel/hypeman/lib/logger" + mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/volumes" ) @@ -199,77 +200,29 @@ func (s *ApiService) createVolumeFromMultipart(ctx context.Context, multipartRea // GetVolume gets volume details // The id parameter can be either a volume ID or name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetVolume(ctx context.Context, request oapi.GetVolumeRequestObject) (oapi.GetVolumeResponseObject, error) { - log := logger.FromContext(ctx) - - // Try lookup by ID first - vol, err := s.VolumeManager.GetVolume(ctx, request.Id) - if errors.Is(err, volumes.ErrNotFound) { - // Try lookup by name - vol, err = s.VolumeManager.GetVolumeByName(ctx, request.Id) - } - - if err != nil { - switch { - case errors.Is(err, volumes.ErrNotFound): - return oapi.GetVolume404JSONResponse{ - Code: "not_found", - Message: "volume not found", - }, nil - case errors.Is(err, volumes.ErrAmbiguousName): - return oapi.GetVolume404JSONResponse{ - Code: "ambiguous_name", - Message: "multiple volumes have this name, use volume ID instead", - }, nil - default: - log.ErrorContext(ctx, "failed to get volume", "error", err, "id", request.Id) - return oapi.GetVolume500JSONResponse{ - Code: "internal_error", - Message: "failed to get volume", - }, nil - } - } + vol := mw.GetResolvedVolume[volumes.Volume](ctx) return oapi.GetVolume200JSONResponse(volumeToOAPI(*vol)), nil } // DeleteVolume deletes a volume // The id parameter can be either a volume ID or name +// Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteVolume(ctx context.Context, request oapi.DeleteVolumeRequestObject) (oapi.DeleteVolumeResponseObject, error) { + vol := mw.GetResolvedVolume[volumes.Volume](ctx) log := logger.FromContext(ctx) - // Resolve ID - try direct ID first, then name lookup - volumeID := request.Id - _, err := s.VolumeManager.GetVolume(ctx, request.Id) - if errors.Is(err, volumes.ErrNotFound) { - // Try lookup by name - vol, nameErr := s.VolumeManager.GetVolumeByName(ctx, request.Id) - if nameErr == nil { - volumeID = vol.Id - } else if errors.Is(nameErr, volumes.ErrAmbiguousName) { - return oapi.DeleteVolume404JSONResponse{ - Code: "ambiguous_name", - Message: "multiple volumes have this name, use volume ID instead", - }, nil - } - // If name lookup also fails with ErrNotFound, we'll proceed with original ID - // and let DeleteVolume return the proper 404 - } - - err = s.VolumeManager.DeleteVolume(ctx, volumeID) + err := s.VolumeManager.DeleteVolume(ctx, vol.Id) if err != nil { switch { - case errors.Is(err, volumes.ErrNotFound): - return oapi.DeleteVolume404JSONResponse{ - Code: "not_found", - Message: "volume not found", - }, nil case errors.Is(err, volumes.ErrInUse): return oapi.DeleteVolume409JSONResponse{ Code: "conflict", Message: "volume is in use by an instance", }, nil default: - log.ErrorContext(ctx, "failed to delete volume", "error", err, "id", request.Id) + log.ErrorContext(ctx, "failed to delete volume", "error", err) return oapi.DeleteVolume500JSONResponse{ Code: "internal_error", Message: "failed to delete volume", diff --git a/cmd/api/api/volumes_test.go b/cmd/api/api/volumes_test.go index 5b26d1d8..179d5ede 100644 --- a/cmd/api/api/volumes_test.go +++ b/cmd/api/api/volumes_test.go @@ -22,14 +22,10 @@ func TestListVolumes_Empty(t *testing.T) { func TestGetVolume_NotFound(t *testing.T) { svc := newTestService(t) - resp, err := svc.GetVolume(ctx(), oapi.GetVolumeRequestObject{ - Id: "non-existent", - }) - require.NoError(t, err) - - notFound, ok := resp.(oapi.GetVolume404JSONResponse) - require.True(t, ok, "expected 404 response") - assert.Equal(t, "not_found", notFound.Code) + // With middleware, not-found would be handled before reaching handler. + // For this test, we call the manager directly to verify the error. + _, err := svc.VolumeManager.GetVolume(ctx(), "non-existent") + require.Error(t, err) } func TestGetVolume_ByName(t *testing.T) { @@ -45,8 +41,8 @@ func TestGetVolume_ByName(t *testing.T) { require.NoError(t, err) created := createResp.(oapi.CreateVolume201JSONResponse) - // Get by name (not ID) - resp, err := svc.GetVolume(ctx(), oapi.GetVolumeRequestObject{ + // Get by name (not ID) - use ctxWithVolume to simulate middleware + resp, err := svc.GetVolume(ctxWithVolume(svc, "my-data"), oapi.GetVolumeRequestObject{ Id: "my-data", // using name instead of ID }) require.NoError(t, err) @@ -69,12 +65,11 @@ func TestDeleteVolume_ByName(t *testing.T) { }) require.NoError(t, err) - // Delete by name - resp, err := svc.DeleteVolume(ctx(), oapi.DeleteVolumeRequestObject{ + // Delete by name - use ctxWithVolume to simulate middleware + resp, err := svc.DeleteVolume(ctxWithVolume(svc, "to-delete"), oapi.DeleteVolumeRequestObject{ Id: "to-delete", }) require.NoError(t, err) _, ok := resp.(oapi.DeleteVolume204Response) assert.True(t, ok, "expected 204 response") } - diff --git a/cmd/api/main.go b/cmd/api/main.go index 1b502b89..e2cb704d 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -219,6 +219,7 @@ func run() error { mw.InjectLogger(logger), mw.AccessLogger(accessLogger), mw.JwtAuth(app.Config.JwtSecret), + mw.ResolveResource(app.ApiService.NewResolvers(), api.ResolverErrorResponder), ).Get("/instances/{id}/exec", app.ApiService.ExecHandler) // OCI Distribution registry endpoints for image push (outside OpenAPI spec) @@ -273,6 +274,10 @@ func run() error { } r.Use(nethttpmiddleware.OapiRequestValidatorWithOptions(spec, validatorOptions)) + // Resource resolver middleware - resolves IDs/names/prefixes before handlers + // Enriches context with resolved resource and logger with resolved ID + r.Use(mw.ResolveResource(app.ApiService.NewResolvers(), api.ResolverErrorResponder)) + // Setup strict handler strictHandler := oapi.NewStrictHandler(app.ApiService, nil) diff --git a/lib/instances/manager.go b/lib/instances/manager.go index bce2d04e..3b95a1cb 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -221,7 +221,7 @@ func (m *manager) RotateLogs(ctx context.Context, maxBytes int64, maxFiles int) m.paths.InstanceHypemanLog(inst.Id), } for _, logPath := range logPaths { - if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil { + if err := rotateLogIfNeeded(logPath, maxBytes, maxFiles); err != nil { lastErr = err // Continue with other logs, but track error } } diff --git a/lib/instances/query.go b/lib/instances/query.go index 28acc32c..e7f02bf6 100644 --- a/lib/instances/query.go +++ b/lib/instances/query.go @@ -155,15 +155,15 @@ func (m *manager) listInstances(ctx context.Context) ([]Instance, error) { // getInstance returns a single instance by ID func (m *manager) getInstance(ctx context.Context, id string) (*Instance, error) { log := logger.FromContext(ctx) - log.DebugContext(ctx, "getting instance", "id", id) + log.DebugContext(ctx, "getting instance", "lookup", id) meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.DebugContext(ctx, "failed to load instance metadata", "lookup", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) - log.DebugContext(ctx, "retrieved instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "retrieved instance", "id", inst.Id, "state", inst.State) return &inst, nil } diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index 677946dd..c4c4fd9f 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -75,6 +75,14 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record return } + // Check if the instance directory exists - if not, this "id" isn't an instance ID + // (could be an ingress ID, volume ID, etc.). Skip to avoid creating orphan directories. + dir := filepath.Dir(logPath) + instanceDir := filepath.Dir(dir) // logs dir -> instance dir + if _, err := os.Stat(instanceDir); os.IsNotExist(err) { + return // not a valid instance, skip silently + } + // Format log line: timestamp LEVEL message key=value key=value... timestamp := r.Time.Format(time.RFC3339) level := r.Level.String() @@ -102,8 +110,7 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record } line += "\n" - // Ensure directory exists - dir := filepath.Dir(logPath) + // Ensure logs directory exists (dir was already computed above) if err := os.MkdirAll(dir, 0755); err != nil { // Use package-level slog (not our handler) to avoid recursion. // No "id" attr means this won't trigger writeToInstanceLog. diff --git a/lib/middleware/README.md b/lib/middleware/README.md new file mode 100644 index 00000000..5a1e5610 --- /dev/null +++ b/lib/middleware/README.md @@ -0,0 +1,21 @@ +# middleware + +HTTP middleware for the hypeman API. + +## Authentication + +JWT bearer token validation for protected endpoints. Extracts user identity and adds it to the request context. + +## Resource Resolution + +Automatically resolves user-provided identifiers (IDs, names, or prefixes) to full resource objects before handlers run. This enables: + +- **Flexible lookups**: Users can reference resources by full ID, name, or ID prefix +- **Consistent error handling**: Returns 404 for not-found, handles ambiguous matches +- **Automatic logging enrichment**: The resolved resource ID is added to the request logger + +Handlers can trust that if they're called, the resource exists and is available via `mw.GetResolvedInstance[T](ctx)` etc. + +## Observability + +OpenTelemetry instrumentation for HTTP requests, including request counts, latencies, and status codes. diff --git a/lib/middleware/resolve.go b/lib/middleware/resolve.go new file mode 100644 index 00000000..ab2e594b --- /dev/null +++ b/lib/middleware/resolve.go @@ -0,0 +1,192 @@ +// Package middleware provides HTTP middleware for the hypeman API. +package middleware + +import ( + "context" + "net/http" + "strings" + + "github.com/go-chi/chi/v5" + "github.com/onkernel/hypeman/lib/logger" +) + +// ResourceResolver is implemented by managers that support lookup by ID, name, or prefix. +type ResourceResolver interface { + // Resolve looks up a resource by ID, name, or ID prefix. + // Returns the resolved ID, the resource, and any error. + // Should return ErrNotFound if not found, ErrAmbiguousName if prefix matches multiple. + Resolve(ctx context.Context, idOrName string) (id string, resource any, err error) +} + +// resolvedResourceKey is the context key for storing the resolved resource. +type resolvedResourceKey struct{ resourceType string } + +// ResolvedResource holds the resolved resource ID and value. +type ResolvedResource struct { + ID string + Resource any +} + +// Resolvers holds resolvers for different resource types. +type Resolvers struct { + Instance ResourceResolver + Volume ResourceResolver + Ingress ResourceResolver + Image ResourceResolver +} + +// ErrorResponder handles resolver errors by writing HTTP responses. +type ErrorResponder func(w http.ResponseWriter, err error, lookup string) + +// ResolveResource creates middleware that resolves resource IDs before handlers run. +// It detects the resource type from the URL path and uses the appropriate resolver. +// The resolved resource is stored in context and the logger is enriched with the ID. +// +// Supported paths: +// - /instances/{id}/* -> uses Instance resolver +// - /volumes/{id}/* -> uses Volume resolver +// - /ingresses/{id}/* -> uses Ingress resolver +// - /images/{name}/* -> uses Image resolver (by name, not ID) +func ResolveResource(resolvers Resolvers, errResponder ErrorResponder) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + path := r.URL.Path + + // Determine resource type and resolver based on path + var resolver ResourceResolver + var resourceType string + var paramName string + + switch { + case strings.HasPrefix(path, "/instances/"): + resolver = resolvers.Instance + resourceType = "instance" + paramName = "id" + case strings.HasPrefix(path, "/volumes/"): + resolver = resolvers.Volume + resourceType = "volume" + paramName = "id" + case strings.HasPrefix(path, "/ingresses/"): + resolver = resolvers.Ingress + resourceType = "ingress" + paramName = "id" + case strings.HasPrefix(path, "/images/"): + resolver = resolvers.Image + resourceType = "image" + paramName = "name" + default: + // No resource to resolve (e.g., list endpoints, health) + next.ServeHTTP(w, r) + return + } + + // Skip if no resolver configured for this resource type + if resolver == nil { + next.ServeHTTP(w, r) + return + } + + // Get the ID parameter from the URL + idOrName := chi.URLParam(r, paramName) + if idOrName == "" { + // No ID in path (e.g., list or create endpoint) + next.ServeHTTP(w, r) + return + } + + // Resolve the resource + resolvedID, resource, err := resolver.Resolve(ctx, idOrName) + if err != nil { + errResponder(w, err, idOrName) + return + } + + // Store resolved resource in context + ctx = context.WithValue(ctx, resolvedResourceKey{resourceType}, ResolvedResource{ + ID: resolvedID, + Resource: resource, + }) + + // Enrich logger with resolved ID + log := logger.FromContext(ctx).With("id", resolvedID) + ctx = logger.AddToContext(ctx, log) + + next.ServeHTTP(w, r.WithContext(ctx)) + }) + } +} + +// GetResolvedInstance retrieves the resolved instance from context. +// Returns nil if not found or wrong type. +func GetResolvedInstance[T any](ctx context.Context) *T { + return getResolved[T](ctx, "instance") +} + +// GetResolvedVolume retrieves the resolved volume from context. +// Returns nil if not found or wrong type. +func GetResolvedVolume[T any](ctx context.Context) *T { + return getResolved[T](ctx, "volume") +} + +// GetResolvedIngress retrieves the resolved ingress from context. +// Returns nil if not found or wrong type. +func GetResolvedIngress[T any](ctx context.Context) *T { + return getResolved[T](ctx, "ingress") +} + +// GetResolvedImage retrieves the resolved image from context. +// Returns nil if not found or wrong type. +func GetResolvedImage[T any](ctx context.Context) *T { + return getResolved[T](ctx, "image") +} + +// GetResolvedID retrieves just the resolved ID for a resource type. +func GetResolvedID(ctx context.Context, resourceType string) string { + if resolved, ok := ctx.Value(resolvedResourceKey{resourceType}).(ResolvedResource); ok { + return resolved.ID + } + return "" +} + +// getResolved is a generic helper to extract typed resources from context. +func getResolved[T any](ctx context.Context, resourceType string) *T { + resolved, ok := ctx.Value(resolvedResourceKey{resourceType}).(ResolvedResource) + if !ok { + return nil + } + + // Handle pointer types + if typed, ok := resolved.Resource.(*T); ok { + return typed + } + + // Handle value types + if typed, ok := resolved.Resource.(T); ok { + return &typed + } + + return nil +} + +// Test helpers for setting resolved resources in context (used by tests) + +// WithResolvedInstance returns a context with the given instance set as resolved. +func WithResolvedInstance(ctx context.Context, id string, inst any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"instance"}, ResolvedResource{ID: id, Resource: inst}) +} + +// WithResolvedVolume returns a context with the given volume set as resolved. +func WithResolvedVolume(ctx context.Context, id string, vol any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"volume"}, ResolvedResource{ID: id, Resource: vol}) +} + +// WithResolvedIngress returns a context with the given ingress set as resolved. +func WithResolvedIngress(ctx context.Context, id string, ing any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"ingress"}, ResolvedResource{ID: id, Resource: ing}) +} + +// WithResolvedImage returns a context with the given image set as resolved. +func WithResolvedImage(ctx context.Context, id string, img any) context.Context { + return context.WithValue(ctx, resolvedResourceKey{"image"}, ResolvedResource{ID: id, Resource: img}) +} From dfddd92f24e85a5f848835657237d3ef3b0ffdf6 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 17:17:25 -0500 Subject: [PATCH 12/14] Fix response code --- cmd/api/api/resolvers.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/api/api/resolvers.go b/cmd/api/api/resolvers.go index 30480c4d..3815f3b5 100644 --- a/cmd/api/api/resolvers.go +++ b/cmd/api/api/resolvers.go @@ -97,6 +97,10 @@ func ResolverErrorResponder(w http.ResponseWriter, err error, lookup string) { w.WriteHeader(http.StatusNotFound) w.Write([]byte(`{"code":"ambiguous","message":"multiple resources match, use full ID"}`)) + case errors.Is(err, images.ErrInvalidName): + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"code":"invalid_name","message":"invalid image reference"}`)) + default: w.WriteHeader(http.StatusInternalServerError) w.Write([]byte(`{"code":"internal_error","message":"failed to resolve resource"}`)) From 0382d44eb5e3a4192871f71914b4a9f8c5d73a54 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 17:29:06 -0500 Subject: [PATCH 13/14] Extra careful checks 1. Fixed `ErrAmbiguousName` to return 409 Conflict (instead of 404) Changed the HTTP status code from `http.StatusNotFound` to `http.StatusConflict` for ambiguous name errors, restoring the previous ingress behavior. 2. Added nil checks with 500 error responses for all `GetResolved*` calls Added defensive nil checks to all 14 handlers that use the resolved resource from middleware. If the middleware didn't set the resource (which shouldn't happen in production but could in tests), the handler now returns a 500 error with `"resource not resolved"` message instead of panicking with a nil pointer dereference. --- cmd/api/api/exec.go | 4 ++++ cmd/api/api/images.go | 12 ++++++++++++ cmd/api/api/ingress.go | 12 ++++++++++++ cmd/api/api/instances.go | 42 ++++++++++++++++++++++++++++++++++++++++ cmd/api/api/resolvers.go | 2 +- cmd/api/api/volumes.go | 12 ++++++++++++ 6 files changed, 83 insertions(+), 1 deletion(-) diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index cc6dc29f..b67259cc 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -44,6 +44,10 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Get instance resolved by middleware inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + http.Error(w, `{"code":"internal_error","message":"resource not resolved"}`, http.StatusInternalServerError) + return + } if inst.State != instances.StateRunning { http.Error(w, fmt.Sprintf(`{"code":"invalid_state","message":"instance must be running (current state: %s)"}`, inst.State), http.StatusConflict) diff --git a/cmd/api/api/images.go b/cmd/api/api/images.go index 0c2842a2..a97fa59b 100644 --- a/cmd/api/api/images.go +++ b/cmd/api/api/images.go @@ -65,6 +65,12 @@ func (s *ApiService) CreateImage(ctx context.Context, request oapi.CreateImageRe // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestObject) (oapi.GetImageResponseObject, error) { img := mw.GetResolvedImage[images.Image](ctx) + if img == nil { + return oapi.GetImage500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } return oapi.GetImage200JSONResponse(imageToOAPI(*img)), nil } @@ -72,6 +78,12 @@ func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestO // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteImage(ctx context.Context, request oapi.DeleteImageRequestObject) (oapi.DeleteImageResponseObject, error) { img := mw.GetResolvedImage[images.Image](ctx) + if img == nil { + return oapi.DeleteImage500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) err := s.ImageManager.DeleteImage(ctx, img.Name) diff --git a/cmd/api/api/ingress.go b/cmd/api/api/ingress.go index ea35cb10..7b9ba881 100644 --- a/cmd/api/api/ingress.go +++ b/cmd/api/api/ingress.go @@ -123,6 +123,12 @@ func (s *ApiService) CreateIngress(ctx context.Context, request oapi.CreateIngre // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetIngress(ctx context.Context, request oapi.GetIngressRequestObject) (oapi.GetIngressResponseObject, error) { ing := mw.GetResolvedIngress[ingress.Ingress](ctx) + if ing == nil { + return oapi.GetIngress500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } return oapi.GetIngress200JSONResponse(ingressToOAPI(*ing)), nil } @@ -130,6 +136,12 @@ func (s *ApiService) GetIngress(ctx context.Context, request oapi.GetIngressRequ // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteIngress(ctx context.Context, request oapi.DeleteIngressRequestObject) (oapi.DeleteIngressResponseObject, error) { ing := mw.GetResolvedIngress[ingress.Ingress](ctx) + if ing == nil { + return oapi.DeleteIngress500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) err := s.IngressManager.Delete(ctx, ing.ID) diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 96abc5a1..8adb8c8d 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -176,6 +176,12 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRequestObject) (oapi.GetInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.GetInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } return oapi.GetInstance200JSONResponse(instanceToOAPI(*inst)), nil } @@ -184,6 +190,12 @@ func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRe // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInstanceRequestObject) (oapi.DeleteInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.DeleteInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) err := s.InstanceManager.DeleteInstance(ctx, inst.Id) @@ -202,6 +214,12 @@ func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInst // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyInstanceRequestObject) (oapi.StandbyInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StandbyInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id) @@ -228,6 +246,12 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreInstanceRequestObject) (oapi.RestoreInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.RestoreInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) result, err := s.InstanceManager.RestoreInstance(ctx, inst.Id) @@ -254,6 +278,12 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstanceRequestObject) (oapi.StopInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StopInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) result, err := s.InstanceManager.StopInstance(ctx, inst.Id) @@ -280,6 +310,12 @@ func (s *ApiService) StopInstance(ctx context.Context, request oapi.StopInstance // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) StartInstance(ctx context.Context, request oapi.StartInstanceRequestObject) (oapi.StartInstanceResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.StartInstance500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) result, err := s.InstanceManager.StartInstance(ctx, inst.Id) @@ -333,6 +369,12 @@ func (r logsStreamResponse) VisitGetInstanceLogsResponse(w http.ResponseWriter) // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstanceLogsRequestObject) (oapi.GetInstanceLogsResponseObject, error) { inst := mw.GetResolvedInstance[instances.Instance](ctx) + if inst == nil { + return oapi.GetInstanceLogs500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } tail := 100 if request.Params.Tail != nil { diff --git a/cmd/api/api/resolvers.go b/cmd/api/api/resolvers.go index 3815f3b5..bde8d8e8 100644 --- a/cmd/api/api/resolvers.go +++ b/cmd/api/api/resolvers.go @@ -94,7 +94,7 @@ func ResolverErrorResponder(w http.ResponseWriter, err error, lookup string) { case errors.Is(err, instances.ErrAmbiguousName), errors.Is(err, volumes.ErrAmbiguousName), errors.Is(err, ingress.ErrAmbiguousName): - w.WriteHeader(http.StatusNotFound) + w.WriteHeader(http.StatusConflict) w.Write([]byte(`{"code":"ambiguous","message":"multiple resources match, use full ID"}`)) case errors.Is(err, images.ErrInvalidName): diff --git a/cmd/api/api/volumes.go b/cmd/api/api/volumes.go index e98eef07..3354c002 100644 --- a/cmd/api/api/volumes.go +++ b/cmd/api/api/volumes.go @@ -203,6 +203,12 @@ func (s *ApiService) createVolumeFromMultipart(ctx context.Context, multipartRea // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) GetVolume(ctx context.Context, request oapi.GetVolumeRequestObject) (oapi.GetVolumeResponseObject, error) { vol := mw.GetResolvedVolume[volumes.Volume](ctx) + if vol == nil { + return oapi.GetVolume500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } return oapi.GetVolume200JSONResponse(volumeToOAPI(*vol)), nil } @@ -211,6 +217,12 @@ func (s *ApiService) GetVolume(ctx context.Context, request oapi.GetVolumeReques // Note: Resolution is handled by ResolveResource middleware func (s *ApiService) DeleteVolume(ctx context.Context, request oapi.DeleteVolumeRequestObject) (oapi.DeleteVolumeResponseObject, error) { vol := mw.GetResolvedVolume[volumes.Volume](ctx) + if vol == nil { + return oapi.DeleteVolume500JSONResponse{ + Code: "internal_error", + Message: "resource not resolved", + }, nil + } log := logger.FromContext(ctx) err := s.VolumeManager.DeleteVolume(ctx, vol.Id) From 228f6b14c19d28c53c6264ed22cb353994084d68 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 10 Dec 2025 18:30:57 -0500 Subject: [PATCH 14/14] resource_id instead of id, fix target in ingress to use instance name $ hypeman ingress create q --hostname 'nginx-test' --port 80 --host-port 8081 Creating ingress nginx-test-wmbi... e5hbxzwc6cq0tnjchw861exg $ hypeman ingress list ID NAME HOSTNAME TARGET TLS CREATED e5hbxzwc6cq0 nginx-test-wmbi nginx-test nginx-gi7w:80 no 4 seconds ago Fix is shows "nginx-gi7w:80" instead of "g:80" --- cmd/api/api/exec.go | 6 +-- lib/ingress/manager.go | 69 +++++++++++++++++++++++++++---- lib/ingress/manager_test.go | 31 +++++++++++--- lib/instances/create.go | 60 +++++++++++++-------------- lib/instances/delete.go | 38 ++++++++--------- lib/instances/ingress_resolver.go | 9 ++++ lib/instances/logs.go | 6 +-- lib/instances/manager_test.go | 8 ++++ lib/instances/query.go | 12 +++--- lib/instances/restore.go | 42 +++++++++---------- lib/instances/standby.go | 46 ++++++++++----------- lib/instances/start.go | 26 ++++++------ lib/instances/stop.go | 24 +++++------ lib/logger/README.md | 14 ++++--- lib/logger/instance_handler.go | 10 ++--- lib/middleware/resolve.go | 9 +++- lib/network/allocate.go | 6 +-- lib/network/derive.go | 2 +- 18 files changed, 260 insertions(+), 158 deletions(-) diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index b67259cc..9d529fc7 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -99,7 +99,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session started log.InfoContext(ctx, "exec session started", - "id", inst.Id, + "instance_id", inst.Id, "subject", subject, "command", execReq.Command, "tty", execReq.TTY, @@ -127,7 +127,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { if err != nil { log.ErrorContext(ctx, "exec failed", "error", err, - "id", inst.Id, + "instance_id", inst.Id, "subject", subject, "duration_ms", duration.Milliseconds(), ) @@ -142,7 +142,7 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Audit log: exec session ended log.InfoContext(ctx, "exec session ended", - "id", inst.Id, + "instance_id", inst.Id, "subject", subject, "exit_code", exit.Code, "duration_ms", duration.Milliseconds(), diff --git a/lib/ingress/manager.go b/lib/ingress/manager.go index 8622e5a6..579af21e 100644 --- a/lib/ingress/manager.go +++ b/lib/ingress/manager.go @@ -26,6 +26,10 @@ type InstanceResolver interface { // InstanceExists checks if an instance with the given name or ID exists. InstanceExists(ctx context.Context, nameOrID string) (bool, error) + + // ResolveInstance resolves an instance name, ID, or ID prefix to its canonical name and ID. + // Returns (name, id, nil) if found, or an error if the instance doesn't exist. + ResolveInstance(ctx context.Context, nameOrID string) (name string, id string, err error) } // Manager is the interface for managing ingress resources. @@ -288,18 +292,20 @@ func (m *manager) Create(ctx context.Context, req CreateIngressRequest) (*Ingres } } - // Validate that all target instances exist (only for literal hostnames) + // Validate that all target instances exist and resolve their names (only for literal hostnames) // Pattern hostnames have dynamic target instances that can't be validated at creation time - for _, rule := range req.Rules { + var resolvedInstanceIDs []string // Track IDs for logging (used for hypeman.log routing) + for i, rule := range req.Rules { if !rule.Match.IsPattern() { - // Literal hostname - validate instance exists - exists, err := m.instanceResolver.InstanceExists(ctx, rule.Target.Instance) + // Literal hostname - validate instance exists and resolve to canonical name + ID + resolvedName, resolvedID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance) if err != nil { - return nil, fmt.Errorf("check instance %q: %w", rule.Target.Instance, err) - } - if !exists { return nil, fmt.Errorf("%w: instance %q not found", ErrInstanceNotFound, rule.Target.Instance) } + // Update the rule with the resolved instance name (human-readable for config) + req.Rules[i].Target.Instance = resolvedName + // Track ID for logging (instance directories are by ID) + resolvedInstanceIDs = append(resolvedInstanceIDs, resolvedID) } // For pattern hostnames, instance validation happens at request time via the upstream resolver } @@ -370,6 +376,23 @@ func (m *manager) Create(ctx context.Context, req CreateIngressRequest) (*Ingres return nil, fmt.Errorf("write config: %w", err) } + // Log creation with ingress_id and instance_id(s) for audit trail + // Each resolved instance gets the log in their hypeman.log (routed by instance_id) + for _, instanceID := range resolvedInstanceIDs { + log.InfoContext(ctx, "ingress created", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_id", instanceID, + ) + } + // If no literal hostnames (all patterns), still log the creation + if len(resolvedInstanceIDs) == 0 { + log.InfoContext(ctx, "ingress created", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + ) + } + return &ingress, nil } @@ -481,6 +504,38 @@ func (m *manager) Delete(ctx context.Context, idOrName string) error { log.ErrorContext(ctx, "failed to write config after delete", "error", err) } + // Log deletion with instance_id(s) for audit trail + // Resolve instance names to IDs for hypeman.log routing + hasLiteralHostname := false + for _, rule := range ingress.Rules { + if !rule.Match.IsPattern() { + hasLiteralHostname = true + // Resolve instance name to ID for logging (instance may have been deleted, so ignore errors) + _, instanceID, err := m.instanceResolver.ResolveInstance(ctx, rule.Target.Instance) + if err == nil { + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_id", instanceID, + ) + } else { + // Instance doesn't exist anymore, log without instance_id + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + "instance_name", rule.Target.Instance, + ) + } + } + } + // If no literal hostnames (all patterns), still log the deletion + if !hasLiteralHostname { + log.InfoContext(ctx, "ingress deleted", + "ingress_id", ingress.ID, + "ingress_name", ingress.Name, + ) + } + return nil } diff --git a/lib/ingress/manager_test.go b/lib/ingress/manager_test.go index dd4c723b..ceda6ff2 100644 --- a/lib/ingress/manager_test.go +++ b/lib/ingress/manager_test.go @@ -13,25 +13,38 @@ import ( // mockInstanceResolver implements InstanceResolver for testing type mockInstanceResolver struct { - instances map[string]string // instance name/ID -> IP + instances map[string]mockInstance // instance name/ID -> mock data +} + +type mockInstance struct { + name string + id string + ip string } func newMockResolver() *mockInstanceResolver { return &mockInstanceResolver{ - instances: make(map[string]string), + instances: make(map[string]mockInstance), } } func (m *mockInstanceResolver) AddInstance(nameOrID, ip string) { - m.instances[nameOrID] = ip + // For backwards compatibility, use the nameOrID as both name and id + m.instances[nameOrID] = mockInstance{name: nameOrID, id: nameOrID, ip: ip} +} + +func (m *mockInstanceResolver) AddInstanceFull(name, id, ip string) { + // Add with explicit name and id + m.instances[name] = mockInstance{name: name, id: id, ip: ip} + m.instances[id] = mockInstance{name: name, id: id, ip: ip} } func (m *mockInstanceResolver) ResolveInstanceIP(ctx context.Context, nameOrID string) (string, error) { - ip, ok := m.instances[nameOrID] + inst, ok := m.instances[nameOrID] if !ok { return "", ErrInstanceNotFound } - return ip, nil + return inst.ip, nil } func (m *mockInstanceResolver) InstanceExists(ctx context.Context, nameOrID string) (bool, error) { @@ -39,6 +52,14 @@ func (m *mockInstanceResolver) InstanceExists(ctx context.Context, nameOrID stri return ok, nil } +func (m *mockInstanceResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + inst, ok := m.instances[nameOrID] + if !ok { + return "", "", ErrInstanceNotFound + } + return inst.name, inst.id, nil +} + func setupTestManager(t *testing.T) (Manager, *mockInstanceResolver, *paths.Paths, func()) { t.Helper() diff --git a/lib/instances/create.go b/lib/instances/create.go index 546e163c..0e023073 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -129,12 +129,12 @@ func (m *manager) createInstance( // 3. Generate instance ID (CUID2 for secure, collision-resistant IDs) id := cuid2.Generate() - log.DebugContext(ctx, "generated instance ID", "id", id) + log.DebugContext(ctx, "generated instance ID", "instance_id", id) // 4. Generate vsock configuration vsockCID := generateVsockCID(id) vsockSocket := m.paths.InstanceVsockSocket(id) - log.DebugContext(ctx, "generated vsock config", "id", id, "cid", vsockCID) + log.DebugContext(ctx, "generated vsock config", "instance_id", id, "cid", vsockCID) // 5. Check instance doesn't already exist if _, err := m.loadMetadata(id); err == nil { @@ -224,35 +224,35 @@ func (m *manager) createInstance( // Setup cleanup stack for automatic rollback on errors cu := cleanup.Make(func() { - log.DebugContext(ctx, "cleaning up instance on error", "id", id) + log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) m.deleteInstanceData(id) }) defer cu.Clean() // 8. Ensure directories - log.DebugContext(ctx, "creating instance directories", "id", id) + log.DebugContext(ctx, "creating instance directories", "instance_id", id) if err := m.ensureDirectories(id); err != nil { - log.ErrorContext(ctx, "failed to create directories", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err) return nil, fmt.Errorf("ensure directories: %w", err) } // 9. Create overlay disk with specified size - log.DebugContext(ctx, "creating overlay disk", "id", id, "size_bytes", stored.OverlaySize) + log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize) if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil { - log.ErrorContext(ctx, "failed to create overlay disk", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create overlay disk: %w", err) } // 10. Allocate network (if network enabled) var netConfig *network.NetworkConfig if networkName != "" { - log.DebugContext(ctx, "allocating network", "id", id, "network", networkName) + log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName) netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{ InstanceID: id, InstanceName: req.Name, }) if err != nil { - log.ErrorContext(ctx, "failed to allocate network", "id", id, "network", networkName, "error", err) + log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "network", networkName, "error", err) return nil, fmt.Errorf("allocate network: %w", err) } // Store IP/MAC in metadata (persisted with instance) @@ -270,12 +270,12 @@ func (m *manager) createInstance( // 10.5. Validate and attach volumes if len(req.Volumes) > 0 { - log.DebugContext(ctx, "validating volumes", "id", id, "count", len(req.Volumes)) + log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes)) for _, volAttach := range req.Volumes { // Check volume exists _, err := m.volumeManager.GetVolume(ctx, volAttach.VolumeID) if err != nil { - log.ErrorContext(ctx, "volume not found", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "volume not found", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("volume %s: %w", volAttach.VolumeID, err) } @@ -285,7 +285,7 @@ func (m *manager) createInstance( MountPath: volAttach.MountPath, Readonly: volAttach.Readonly, }); err != nil { - log.ErrorContext(ctx, "failed to attach volume", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "failed to attach volume", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("attach volume %s: %w", volAttach.VolumeID, err) } @@ -297,9 +297,9 @@ func (m *manager) createInstance( // Create overlay disk for volumes with overlay enabled if volAttach.Overlay { - log.DebugContext(ctx, "creating volume overlay disk", "id", id, "volume_id", volAttach.VolumeID, "size", volAttach.OverlaySize) + log.DebugContext(ctx, "creating volume overlay disk", "instance_id", id, "volume_id", volAttach.VolumeID, "size", volAttach.OverlaySize) if err := m.createVolumeOverlayDisk(id, volAttach.VolumeID, volAttach.OverlaySize); err != nil { - log.ErrorContext(ctx, "failed to create volume overlay disk", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.ErrorContext(ctx, "failed to create volume overlay disk", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) return nil, fmt.Errorf("create volume overlay disk %s: %w", volAttach.VolumeID, err) } } @@ -310,24 +310,24 @@ func (m *manager) createInstance( // 11. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} - log.DebugContext(ctx, "creating config disk", "id", id) + log.DebugContext(ctx, "creating config disk", "instance_id", id) if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to create config disk", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } // 12. Save metadata - log.DebugContext(ctx, "saving instance metadata", "id", id) + log.DebugContext(ctx, "saving instance metadata", "instance_id", id) meta := &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } // 13. Start VMM and boot VM - log.InfoContext(ctx, "starting VMM and booting VM", "id", id) + log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to start and boot VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) return nil, err } @@ -339,7 +339,7 @@ func (m *manager) createInstance( if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log but don't fail // Instance is recoverable, state will be derived - log.WarnContext(ctx, "failed to update metadata after VM start", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after VM start", "instance_id", id, "error", err) } // Success - release cleanup stack (prevent cleanup) @@ -353,7 +353,7 @@ func (m *manager) createInstance( // Return instance with derived state finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance created successfully", "id", id, "name", req.Name, "state", finalInst.State) + log.InfoContext(ctx, "instance created successfully", "instance_id", id, "name", req.Name, "state", finalInst.State) return &finalInst, nil } @@ -469,7 +469,7 @@ func (m *manager) startAndBootVM( log := logger.FromContext(ctx) // Start VMM process and capture PID - log.DebugContext(ctx, "starting VMM process", "id", stored.Id, "version", stored.CHVersion) + log.DebugContext(ctx, "starting VMM process", "instance_id", stored.Id, "version", stored.CHVersion) pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath) if err != nil { return fmt.Errorf("start vmm: %w", err) @@ -477,7 +477,7 @@ func (m *manager) startAndBootVM( // Store the PID for later cleanup stored.CHPID = &pid - log.DebugContext(ctx, "VMM process started", "id", stored.Id, "pid", pid) + log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid) // Create VMM client client, err := vmm.NewVMM(stored.SocketPath) @@ -493,7 +493,7 @@ func (m *manager) startAndBootVM( } // Create VM in VMM - log.DebugContext(ctx, "creating VM in VMM", "id", stored.Id) + log.DebugContext(ctx, "creating VM in VMM", "instance_id", stored.Id) createResp, err := client.CreateVMWithResponse(ctx, vmConfig) if err != nil { return fmt.Errorf("create vm: %w", err) @@ -501,12 +501,12 @@ func (m *manager) startAndBootVM( if createResp.StatusCode() != 204 { // Include response body for debugging body := string(createResp.Body) - log.ErrorContext(ctx, "create VM failed", "id", stored.Id, "status", createResp.StatusCode(), "body", body) + log.ErrorContext(ctx, "create VM failed", "instance_id", stored.Id, "status", createResp.StatusCode(), "body", body) return fmt.Errorf("create vm failed with status %d: %s", createResp.StatusCode(), body) } // Transition: Created → Running (boot VM) - log.DebugContext(ctx, "booting VM", "id", stored.Id) + log.DebugContext(ctx, "booting VM", "instance_id", stored.Id) bootResp, err := client.BootVMWithResponse(ctx) if err != nil { // Try to cleanup @@ -518,18 +518,18 @@ func (m *manager) startAndBootVM( client.DeleteVMWithResponse(ctx) client.ShutdownVMMWithResponse(ctx) body := string(bootResp.Body) - log.ErrorContext(ctx, "boot VM failed", "id", stored.Id, "status", bootResp.StatusCode(), "body", body) + log.ErrorContext(ctx, "boot VM failed", "instance_id", stored.Id, "status", bootResp.StatusCode(), "body", body) return fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), body) } // Optional: Expand memory to max if hotplug configured if inst.HotplugSize > 0 { totalBytes := inst.Size + inst.HotplugSize - log.DebugContext(ctx, "expanding VM memory", "id", stored.Id, "total_bytes", totalBytes) + log.DebugContext(ctx, "expanding VM memory", "instance_id", stored.Id, "total_bytes", totalBytes) resizeConfig := vmm.VmResize{DesiredRam: &totalBytes} // Best effort, ignore errors if resp, err := client.PutVmResizeWithResponse(ctx, resizeConfig); err != nil || resp.StatusCode() != 204 { - log.WarnContext(ctx, "failed to expand VM memory", "id", stored.Id, "error", err) + log.WarnContext(ctx, "failed to expand VM memory", "instance_id", stored.Id, "error", err) } } diff --git a/lib/instances/delete.go b/lib/instances/delete.go index f6be1fae..24d8ddbd 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -17,67 +17,67 @@ func (m *manager) deleteInstance( id string, ) error { log := logger.FromContext(ctx) - log.InfoContext(ctx, "deleting instance", "id", id) + log.InfoContext(ctx, "deleting instance", "instance_id", id) // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return err } inst := m.toInstance(ctx, meta) - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Get network allocation BEFORE killing VMM (while we can still query it) var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 3. If VMM might be running, force kill it // Also attempt kill for StateUnknown since we can't be sure if VMM is running if inst.State.RequiresVMM() || inst.State == StateUnknown { - log.DebugContext(ctx, "stopping VMM", "id", id, "state", inst.State) + log.DebugContext(ctx, "stopping VMM", "instance_id", id, "state", inst.State) if err := m.killVMM(ctx, &inst); err != nil { // Log error but continue with cleanup // Best effort to clean up even if VMM is unresponsive - log.WarnContext(ctx, "failed to kill VMM, continuing with cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to kill VMM, continuing with cleanup", "instance_id", id, "error", err) } } // 4. Release network allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue with cleanup - log.WarnContext(ctx, "failed to release network, continuing with cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing with cleanup", "instance_id", id, "error", err) } } // 5. Detach volumes if len(inst.Volumes) > 0 { - log.DebugContext(ctx, "detaching volumes", "id", id, "count", len(inst.Volumes)) + log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes)) for _, volAttach := range inst.Volumes { if err := m.volumeManager.DetachVolume(ctx, volAttach.VolumeID, id); err != nil { // Log error but continue with cleanup - log.WarnContext(ctx, "failed to detach volume, continuing with cleanup", "id", id, "volume_id", volAttach.VolumeID, "error", err) + log.WarnContext(ctx, "failed to detach volume, continuing with cleanup", "instance_id", id, "volume_id", volAttach.VolumeID, "error", err) } } } // 6. Delete all instance data - log.DebugContext(ctx, "deleting instance data", "id", id) + log.DebugContext(ctx, "deleting instance data", "instance_id", id) if err := m.deleteInstanceData(id); err != nil { - log.ErrorContext(ctx, "failed to delete instance data", "id", id, "error", err) + log.ErrorContext(ctx, "failed to delete instance data", "instance_id", id, "error", err) return fmt.Errorf("delete instance data: %w", err) } - log.InfoContext(ctx, "instance deleted successfully", "id", id) + log.InfoContext(ctx, "instance deleted successfully", "instance_id", id) return nil } @@ -95,9 +95,9 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error { if err := syscall.Kill(pid, 0); err == nil { // Process exists - kill it immediately with SIGKILL // No graceful shutdown needed since we're deleting all data - log.DebugContext(ctx, "killing VMM process", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "killing VMM process", "instance_id", inst.Id, "pid", pid) if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { - log.WarnContext(ctx, "failed to kill VMM process", "id", inst.Id, "pid", pid, "error", err) + log.WarnContext(ctx, "failed to kill VMM process", "instance_id", inst.Id, "pid", pid, "error", err) } // Wait for process to die and reap it to prevent zombies @@ -107,16 +107,16 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error { wpid, err := syscall.Wait4(pid, &wstatus, syscall.WNOHANG, nil) if err != nil || wpid == pid { // Process reaped successfully or error (likely ECHILD if already reaped) - log.DebugContext(ctx, "VMM process killed and reaped", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "VMM process killed and reaped", "instance_id", inst.Id, "pid", pid) break } if i == 49 { - log.WarnContext(ctx, "VMM process did not exit in time", "id", inst.Id, "pid", pid) + log.WarnContext(ctx, "VMM process did not exit in time", "instance_id", inst.Id, "pid", pid) } time.Sleep(100 * time.Millisecond) } } else { - log.DebugContext(ctx, "VMM process not running", "id", inst.Id, "pid", pid) + log.DebugContext(ctx, "VMM process not running", "instance_id", inst.Id, "pid", pid) } } diff --git a/lib/instances/ingress_resolver.go b/lib/instances/ingress_resolver.go index 6220967e..47d9200a 100644 --- a/lib/instances/ingress_resolver.go +++ b/lib/instances/ingress_resolver.go @@ -42,3 +42,12 @@ func (r *IngressResolver) InstanceExists(ctx context.Context, nameOrID string) ( _, err := r.manager.GetInstance(ctx, nameOrID) return err == nil, nil } + +// ResolveInstance resolves an instance name, ID, or ID prefix to its canonical name and ID. +func (r *IngressResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + inst, err := r.manager.GetInstance(ctx, nameOrID) + if err != nil { + return "", "", fmt.Errorf("instance not found: %s", nameOrID) + } + return inst.Name, inst.Id, nil +} diff --git a/lib/instances/logs.go b/lib/instances/logs.go index a022081a..a483ec26 100644 --- a/lib/instances/logs.go +++ b/lib/instances/logs.go @@ -34,7 +34,7 @@ var ErrLogNotFound = fmt.Errorf("log file not found") // Returns last N lines, then continues following if follow=true func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, follow bool, source LogSource) (<-chan string, error) { log := logger.FromContext(ctx) - log.DebugContext(ctx, "starting log stream", "id", id, "tail", tail, "follow", follow, "source", source) + log.DebugContext(ctx, "starting log stream", "instance_id", id, "tail", tail, "follow", follow, "source", source) // Verify tail command is available if _, err := exec.LookPath("tail"); err != nil { @@ -92,14 +92,14 @@ func (m *manager) streamInstanceLogs(ctx context.Context, id string, tail int, f for scanner.Scan() { select { case <-ctx.Done(): - log.DebugContext(ctx, "log stream cancelled", "id", id) + log.DebugContext(ctx, "log stream cancelled", "instance_id", id) return case out <- scanner.Text(): } } if err := scanner.Err(); err != nil { - log.ErrorContext(ctx, "scanner error", "id", id, "error", err) + log.ErrorContext(ctx, "scanner error", "instance_id", id, "error", err) } // Wait for tail to exit (important for non-follow mode) diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 68dc37c2..2ee0a7f4 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -983,3 +983,11 @@ func (r *testInstanceResolver) ResolveInstanceIP(ctx context.Context, nameOrID s func (r *testInstanceResolver) InstanceExists(ctx context.Context, nameOrID string) (bool, error) { return r.exists, nil } + +func (r *testInstanceResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + if !r.exists { + return "", "", fmt.Errorf("instance not found: %s", nameOrID) + } + // For tests, just return nameOrID as both name and id + return nameOrID, nameOrID, nil +} diff --git a/lib/instances/query.go b/lib/instances/query.go index e7f02bf6..819ba1a2 100644 --- a/lib/instances/query.go +++ b/lib/instances/query.go @@ -36,7 +36,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Failed to create client - this is unexpected if socket exists errMsg := fmt.Sprintf("failed to create VMM client: %v", err) log.WarnContext(ctx, "failed to determine instance state", - "id", stored.Id, + "instance_id", stored.Id, "socket", stored.SocketPath, "error", err, ) @@ -48,7 +48,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Socket exists but VMM is unreachable - this is unexpected errMsg := fmt.Sprintf("failed to query VMM: %v", err) log.WarnContext(ctx, "failed to query VMM state", - "id", stored.Id, + "instance_id", stored.Id, "socket", stored.SocketPath, "error", err, ) @@ -60,7 +60,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state body := string(resp.Body) errMsg := fmt.Sprintf("VMM returned error (status %d): %s", resp.StatusCode(), body) log.WarnContext(ctx, "VMM returned error response", - "id", stored.Id, + "instance_id", stored.Id, "socket", stored.SocketPath, "status_code", resp.StatusCode(), "body", body, @@ -82,7 +82,7 @@ func (m *manager) deriveState(ctx context.Context, stored *StoredMetadata) state // Unknown CH state - log and return Unknown errMsg := fmt.Sprintf("unexpected VMM state: %s", resp.JSON200.State) log.WarnContext(ctx, "VMM returned unexpected state", - "id", stored.Id, + "instance_id", stored.Id, "vmm_state", resp.JSON200.State, ) return stateResult{State: StateUnknown, Error: &errMsg} @@ -140,7 +140,7 @@ func (m *manager) listInstances(ctx context.Context) ([]Instance, error) { meta, err := m.loadMetadata(id) if err != nil { // Skip instances with invalid metadata - log.WarnContext(ctx, "skipping instance with invalid metadata", "id", id, "error", err) + log.WarnContext(ctx, "skipping instance with invalid metadata", "instance_id", id, "error", err) continue } @@ -164,6 +164,6 @@ func (m *manager) getInstance(ctx context.Context, id string) (*Instance, error) } inst := m.toInstance(ctx, meta) - log.DebugContext(ctx, "retrieved instance", "id", inst.Id, "state", inst.State) + log.DebugContext(ctx, "retrieved instance", "instance_id", inst.Id, "state", inst.State) return &inst, nil } diff --git a/lib/instances/restore.go b/lib/instances/restore.go index 53b7b3e9..9ec95bed 100644 --- a/lib/instances/restore.go +++ b/lib/instances/restore.go @@ -20,7 +20,7 @@ func (m *manager) restoreInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "restoring instance from standby", "id", id) + log.InfoContext(ctx, "restoring instance from standby", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -32,22 +32,22 @@ func (m *manager) restoreInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State, "has_snapshot", inst.HasSnapshot) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State, "has_snapshot", inst.HasSnapshot) // 2. Validate state if inst.State != StateStandby { - log.ErrorContext(ctx, "invalid state for restore", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for restore", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot restore from state %s", ErrInvalidState, inst.State) } if !inst.HasSnapshot { - log.ErrorContext(ctx, "no snapshot available", "id", id) + log.ErrorContext(ctx, "no snapshot available", "instance_id", id) return nil, fmt.Errorf("no snapshot available for instance %s", id) } @@ -56,17 +56,17 @@ func (m *manager) restoreInstance( // 4. Recreate TAP device if network enabled if stored.NetworkEnabled { - log.DebugContext(ctx, "recreating network for restore", "id", id, "network", "default") + log.DebugContext(ctx, "recreating network for restore", "instance_id", id, "network", "default") if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { - log.ErrorContext(ctx, "failed to recreate network", "id", id, "error", err) + log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) return nil, fmt.Errorf("recreate network: %w", err) } } // 5. Transition: Standby → Paused (start VMM + restore) - log.DebugContext(ctx, "restoring from snapshot", "id", id, "snapshot_dir", snapshotDir) + log.DebugContext(ctx, "restoring from snapshot", "instance_id", id, "snapshot_dir", snapshotDir) if err := m.restoreFromSnapshot(ctx, stored, snapshotDir); err != nil { - log.ErrorContext(ctx, "failed to restore from snapshot", "id", id, "error", err) + log.ErrorContext(ctx, "failed to restore from snapshot", "instance_id", id, "error", err) // Cleanup network on failure // Note: Network cleanup is explicitly called on failure paths to ensure TAP devices // are removed. In production, stale TAP devices from unexpected failures (e.g., @@ -81,7 +81,7 @@ func (m *manager) restoreInstance( // 6. Create client for resumed VM client, err := vmm.NewVMM(stored.SocketPath) if err != nil { - log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { netAlloc, _ := m.networkManager.GetAllocation(ctx, id) @@ -91,10 +91,10 @@ func (m *manager) restoreInstance( } // 7. Transition: Paused → Running (resume) - log.DebugContext(ctx, "resuming VM", "id", id) + log.DebugContext(ctx, "resuming VM", "instance_id", id) resumeResp, err := client.ResumeVMWithResponse(ctx) if err != nil || resumeResp.StatusCode() != 204 { - log.ErrorContext(ctx, "failed to resume VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to resume VM", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { netAlloc, _ := m.networkManager.GetAllocation(ctx, id) @@ -104,7 +104,7 @@ func (m *manager) restoreInstance( } // 8. Delete snapshot after successful restore - log.DebugContext(ctx, "deleting snapshot after successful restore", "id", id) + log.DebugContext(ctx, "deleting snapshot after successful restore", "instance_id", id) os.RemoveAll(snapshotDir) // Best effort, ignore errors // 9. Update timestamp @@ -114,7 +114,7 @@ func (m *manager) restoreInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log.WarnContext(ctx, "failed to update metadata after restore", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after restore", "instance_id", id, "error", err) } // Record metrics @@ -125,7 +125,7 @@ func (m *manager) restoreInstance( // Return instance with derived state (should be Running now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance restored successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance restored successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } @@ -138,7 +138,7 @@ func (m *manager) restoreFromSnapshot( log := logger.FromContext(ctx) // Start VMM process and capture PID - log.DebugContext(ctx, "starting VMM process for restore", "id", stored.Id, "version", stored.CHVersion) + log.DebugContext(ctx, "starting VMM process for restore", "instance_id", stored.Id, "version", stored.CHVersion) pid, err := vmm.StartProcess(ctx, m.paths, stored.CHVersion, stored.SocketPath) if err != nil { return fmt.Errorf("start vmm: %w", err) @@ -146,7 +146,7 @@ func (m *manager) restoreFromSnapshot( // Store the PID for later cleanup stored.CHPID = &pid - log.DebugContext(ctx, "VMM process started", "id", stored.Id, "pid", pid) + log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid) // Create client client, err := vmm.NewVMM(stored.SocketPath) @@ -161,19 +161,19 @@ func (m *manager) restoreFromSnapshot( Prefault: ptr(false), // Don't prefault pages for faster restore } - log.DebugContext(ctx, "invoking VMM restore API", "id", stored.Id, "source_url", sourceURL) + log.DebugContext(ctx, "invoking VMM restore API", "instance_id", stored.Id, "source_url", sourceURL) resp, err := client.PutVmRestoreWithResponse(ctx, restoreConfig) if err != nil { - log.ErrorContext(ctx, "restore API call failed", "id", stored.Id, "error", err) + log.ErrorContext(ctx, "restore API call failed", "instance_id", stored.Id, "error", err) client.ShutdownVMMWithResponse(ctx) // Cleanup return fmt.Errorf("restore api call: %w", err) } if resp.StatusCode() != 204 { - log.ErrorContext(ctx, "restore API returned error", "id", stored.Id, "status", resp.StatusCode()) + log.ErrorContext(ctx, "restore API returned error", "instance_id", stored.Id, "status", resp.StatusCode()) client.ShutdownVMMWithResponse(ctx) // Cleanup return fmt.Errorf("restore failed with status %d", resp.StatusCode()) } - log.DebugContext(ctx, "VM restored from snapshot successfully", "id", stored.Id) + log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id) return nil } diff --git a/lib/instances/standby.go b/lib/instances/standby.go index 68e67661..8c2a2620 100644 --- a/lib/instances/standby.go +++ b/lib/instances/standby.go @@ -21,7 +21,7 @@ func (m *manager) standbyInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "putting instance in standby", "id", id) + log.InfoContext(ctx, "putting instance in standby", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -33,17 +33,17 @@ func (m *manager) standbyInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state transition (must be Running to start standby flow) if inst.State != StateRunning { - log.ErrorContext(ctx, "invalid state for standby", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for standby", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot standby from state %s", ErrInvalidState, inst.State) } @@ -51,60 +51,60 @@ func (m *manager) standbyInstance( // This is needed to delete the TAP device after VMM shuts down var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 4. Create VMM client client, err := vmm.NewVMM(inst.SocketPath) if err != nil { - log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) + log.ErrorContext(ctx, "failed to create VMM client", "instance_id", id, "error", err) return nil, fmt.Errorf("create vmm client: %w", err) } // 5. Reduce memory to base size (virtio-mem hotplug) - log.DebugContext(ctx, "reducing VM memory before snapshot", "id", id, "base_size", inst.Size) + log.DebugContext(ctx, "reducing VM memory before snapshot", "instance_id", id, "base_size", inst.Size) if err := reduceMemory(ctx, client, inst.Size); err != nil { // Log warning but continue - snapshot will just be larger - log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "id", id, "error", err) + log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "instance_id", id, "error", err) } // 6. Transition: Running → Paused - log.DebugContext(ctx, "pausing VM", "id", id) + log.DebugContext(ctx, "pausing VM", "instance_id", id) pauseResp, err := client.PauseVMWithResponse(ctx) if err != nil || pauseResp.StatusCode() != 204 { - log.ErrorContext(ctx, "failed to pause VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to pause VM", "instance_id", id, "error", err) return nil, fmt.Errorf("pause vm failed: %w", err) } // 7. Create snapshot snapshotDir := m.paths.InstanceSnapshotLatest(id) - log.DebugContext(ctx, "creating snapshot", "id", id, "snapshot_dir", snapshotDir) + log.DebugContext(ctx, "creating snapshot", "instance_id", id, "snapshot_dir", snapshotDir) if err := createSnapshot(ctx, client, snapshotDir); err != nil { // Snapshot failed - try to resume VM - log.ErrorContext(ctx, "snapshot failed, attempting to resume VM", "id", id, "error", err) + log.ErrorContext(ctx, "snapshot failed, attempting to resume VM", "instance_id", id, "error", err) client.ResumeVMWithResponse(ctx) return nil, fmt.Errorf("create snapshot: %w", err) } // 8. Stop VMM gracefully (snapshot is complete) - log.DebugContext(ctx, "shutting down VMM", "id", id) + log.DebugContext(ctx, "shutting down VMM", "instance_id", id) if err := m.shutdownVMM(ctx, &inst); err != nil { // Log but continue - snapshot was created successfully - log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "id", id, "error", err) + log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "instance_id", id, "error", err) } // 9. Release network allocation (delete TAP device) // TAP devices with explicit Owner/Group fields do NOT auto-delete when VMM exits // They must be explicitly deleted if inst.NetworkEnabled { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue - snapshot was created successfully - log.WarnContext(ctx, "failed to release network, continuing with standby", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing with standby", "instance_id", id, "error", err) } } @@ -115,7 +115,7 @@ func (m *manager) standbyInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } @@ -127,7 +127,7 @@ func (m *manager) standbyInstance( // Return instance with derived state (should be Standby now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance put in standby successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance put in standby successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } @@ -247,20 +247,20 @@ func (m *manager) shutdownVMM(ctx context.Context, inst *Instance) error { client, err := vmm.NewVMM(inst.SocketPath) if err != nil { // Can't connect - VMM might already be stopped - log.DebugContext(ctx, "could not connect to VMM, may already be stopped", "id", inst.Id) + log.DebugContext(ctx, "could not connect to VMM, may already be stopped", "instance_id", inst.Id) return nil } // Try graceful shutdown - log.DebugContext(ctx, "sending shutdown command to VMM", "id", inst.Id) + log.DebugContext(ctx, "sending shutdown command to VMM", "instance_id", inst.Id) client.ShutdownVMMWithResponse(ctx) // Wait for process to exit if inst.CHPID != nil { if !WaitForProcessExit(*inst.CHPID, 2*time.Second) { - log.WarnContext(ctx, "VMM did not exit gracefully in time", "id", inst.Id, "pid", *inst.CHPID) + log.WarnContext(ctx, "VMM did not exit gracefully in time", "instance_id", inst.Id, "pid", *inst.CHPID) } else { - log.DebugContext(ctx, "VMM shutdown gracefully", "id", inst.Id, "pid", *inst.CHPID) + log.DebugContext(ctx, "VMM shutdown gracefully", "instance_id", inst.Id, "pid", *inst.CHPID) } } diff --git a/lib/instances/start.go b/lib/instances/start.go index 5f044050..149c1995 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -18,7 +18,7 @@ func (m *manager) startInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "starting instance", "id", id) + log.InfoContext(ctx, "starting instance", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -30,40 +30,40 @@ func (m *manager) startInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state (must be Stopped to start) if inst.State != StateStopped { - log.ErrorContext(ctx, "invalid state for start", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for start", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot start from state %s, must be Stopped", ErrInvalidState, inst.State) } // 3. Get image info (needed for buildVMConfig) - log.DebugContext(ctx, "getting image info", "id", id, "image", stored.Image) + log.DebugContext(ctx, "getting image info", "instance_id", id, "image", stored.Image) imageInfo, err := m.imageManager.GetImage(ctx, stored.Image) if err != nil { - log.ErrorContext(ctx, "failed to get image", "id", id, "image", stored.Image, "error", err) + log.ErrorContext(ctx, "failed to get image", "instance_id", id, "image", stored.Image, "error", err) return nil, fmt.Errorf("get image: %w", err) } // 4. Recreate network allocation if network enabled var netConfig *network.NetworkConfig if stored.NetworkEnabled { - log.DebugContext(ctx, "recreating network for start", "id", id, "network", "default") + log.DebugContext(ctx, "recreating network for start", "instance_id", id, "network", "default") if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { - log.ErrorContext(ctx, "failed to recreate network", "id", id, "error", err) + log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) return nil, fmt.Errorf("recreate network: %w", err) } // Get the network config for VM configuration netAlloc, err := m.networkManager.GetAllocation(ctx, id) if err != nil { - log.ErrorContext(ctx, "failed to get network allocation", "id", id, "error", err) + log.ErrorContext(ctx, "failed to get network allocation", "instance_id", id, "error", err) // Cleanup network on failure if netAlloc != nil { m.networkManager.ReleaseAllocation(ctx, netAlloc) @@ -79,9 +79,9 @@ func (m *manager) startInstance( } // 5. Start VMM and boot VM (reuses logic from create) - log.InfoContext(ctx, "starting VMM and booting VM", "id", id) + log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { - log.ErrorContext(ctx, "failed to start and boot VM", "id", id, "error", err) + log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil { @@ -98,7 +98,7 @@ func (m *manager) startInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { // VM is running but metadata failed - log but don't fail - log.WarnContext(ctx, "failed to update metadata after VM start", "id", id, "error", err) + log.WarnContext(ctx, "failed to update metadata after VM start", "instance_id", id, "error", err) } // Record metrics @@ -109,6 +109,6 @@ func (m *manager) startInstance( // Return instance with derived state (should be Running now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance started successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance started successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } diff --git a/lib/instances/stop.go b/lib/instances/stop.go index 5a37c809..47336caf 100644 --- a/lib/instances/stop.go +++ b/lib/instances/stop.go @@ -18,7 +18,7 @@ func (m *manager) stopInstance( ) (*Instance, error) { start := time.Now() log := logger.FromContext(ctx) - log.InfoContext(ctx, "stopping instance", "id", id) + log.InfoContext(ctx, "stopping instance", "instance_id", id) // Start tracing span if tracer is available if m.metrics != nil && m.metrics.tracer != nil { @@ -30,44 +30,44 @@ func (m *manager) stopInstance( // 1. Load instance meta, err := m.loadMetadata(id) if err != nil { - log.ErrorContext(ctx, "failed to load instance metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to load instance metadata", "instance_id", id, "error", err) return nil, err } inst := m.toInstance(ctx, meta) stored := &meta.StoredMetadata - log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) + log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State) // 2. Validate state transition (must be Running to stop) if inst.State != StateRunning { - log.ErrorContext(ctx, "invalid state for stop", "id", id, "state", inst.State) + log.ErrorContext(ctx, "invalid state for stop", "instance_id", id, "state", inst.State) return nil, fmt.Errorf("%w: cannot stop from state %s, must be Running", ErrInvalidState, inst.State) } // 3. Get network allocation BEFORE killing VMM (while we can still query it) var networkAlloc *network.Allocation if inst.NetworkEnabled { - log.DebugContext(ctx, "getting network allocation", "id", id) + log.DebugContext(ctx, "getting network allocation", "instance_id", id) networkAlloc, err = m.networkManager.GetAllocation(ctx, id) if err != nil { - log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "instance_id", id, "error", err) } } // 4. Shutdown VMM process // TODO: Add graceful shutdown via vsock signal to allow app to clean up - log.DebugContext(ctx, "shutting down VMM", "id", id) + log.DebugContext(ctx, "shutting down VMM", "instance_id", id) if err := m.shutdownVMM(ctx, &inst); err != nil { // Log but continue - try to clean up anyway - log.WarnContext(ctx, "failed to shutdown VMM gracefully", "id", id, "error", err) + log.WarnContext(ctx, "failed to shutdown VMM gracefully", "instance_id", id, "error", err) } // 5. Release network allocation (delete TAP device) if inst.NetworkEnabled && networkAlloc != nil { - log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { // Log error but continue - log.WarnContext(ctx, "failed to release network, continuing", "id", id, "error", err) + log.WarnContext(ctx, "failed to release network, continuing", "instance_id", id, "error", err) } } @@ -78,7 +78,7 @@ func (m *manager) stopInstance( meta = &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { - log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) + log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err) return nil, fmt.Errorf("save metadata: %w", err) } @@ -90,6 +90,6 @@ func (m *manager) stopInstance( // Return instance with derived state (should be Stopped now) finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance stopped successfully", "id", id, "state", finalInst.State) + log.InfoContext(ctx, "instance stopped successfully", "instance_id", id, "state", finalInst.State) return &finalInst, nil } diff --git a/lib/logger/README.md b/lib/logger/README.md index 9ac813d1..7e2db7a4 100644 --- a/lib/logger/README.md +++ b/lib/logger/README.md @@ -36,12 +36,12 @@ ctx = logger.AddToContext(ctx, log) // Retrieve from context log = logger.FromContext(ctx) -log.InfoContext(ctx, "instance created", "id", instanceID) +log.InfoContext(ctx, "instance created", "instance_id", instanceID) ``` ## Per-Instance Logging -The `InstanceLogHandler` automatically writes logs with an `"id"` attribute to per-instance `hypeman.log` files. This provides an operations audit trail for each VM. +The `InstanceLogHandler` automatically writes logs with an `"instance_id"` attribute to per-instance `hypeman.log` files. This provides an operations audit trail for each VM. ```go // Wrap any handler with instance logging @@ -49,8 +49,12 @@ handler := logger.NewInstanceLogHandler(baseHandler, func(id string) string { return paths.InstanceHypemanLog(id) }) -// Logs with "id" attribute are automatically written to that instance's hypeman.log -log.InfoContext(ctx, "starting VM", "id", instanceID) +// Logs with "instance_id" attribute are automatically written to that instance's hypeman.log +log.InfoContext(ctx, "starting VM", "instance_id", instanceID) + +// Related operations (e.g., ingress creation) can also include instance_id +// to appear in the instance's audit log +log.InfoContext(ctx, "ingress created", "ingress_id", ingressID, "instance_id", targetInstance) ``` ## Output @@ -64,7 +68,7 @@ When OTel tracing is active, logs include trace context: "subsystem": "INSTANCES", "trace_id": "abc123...", "span_id": "def456...", - "id": "instance-123" + "instance_id": "instance-123" } ``` diff --git a/lib/logger/instance_handler.go b/lib/logger/instance_handler.go index c4c4fd9f..6a034177 100644 --- a/lib/logger/instance_handler.go +++ b/lib/logger/instance_handler.go @@ -44,7 +44,7 @@ func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { // Check for instance ID in pre-bound attrs first (from WithAttrs) var instanceID string for _, a := range h.preAttrs { - if a.Key == "id" { + if a.Key == "instance_id" { instanceID = a.Value.String() break } @@ -52,7 +52,7 @@ func (h *InstanceLogHandler) Handle(ctx context.Context, r slog.Record) error { // Then check record attrs (overrides pre-bound if present) r.Attrs(func(a slog.Attr) bool { - if a.Key == "id" { + if a.Key == "instance_id" { instanceID = a.Value.String() return false // stop iteration } @@ -88,16 +88,16 @@ func (h *InstanceLogHandler) writeToInstanceLog(instanceID string, r slog.Record level := r.Level.String() msg := r.Message - // Collect attributes (excluding "id" since it's implicit) + // Collect attributes (excluding "instance_id" since it's implicit) // Include both pre-bound attrs and record attrs var attrs []string for _, a := range h.preAttrs { - if a.Key != "id" { + if a.Key != "instance_id" { attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) } } r.Attrs(func(a slog.Attr) bool { - if a.Key != "id" { + if a.Key != "instance_id" { attrs = append(attrs, fmt.Sprintf("%s=%v", a.Key, a.Value)) } return true diff --git a/lib/middleware/resolve.go b/lib/middleware/resolve.go index ab2e594b..75593370 100644 --- a/lib/middleware/resolve.go +++ b/lib/middleware/resolve.go @@ -108,8 +108,13 @@ func ResolveResource(resolvers Resolvers, errResponder ErrorResponder) func(http Resource: resource, }) - // Enrich logger with resolved ID - log := logger.FromContext(ctx).With("id", resolvedID) + // Enrich logger with resource-specific key + // Use "image_name" for images (keyed by OCI reference), "_id" for others + logKey := resourceType + "_id" + if resourceType == "image" { + logKey = "image_name" + } + log := logger.FromContext(ctx).With(logKey, resolvedID) ctx = logger.AddToContext(ctx, log) next.ServeHTTP(w, r.WithContext(ctx)) diff --git a/lib/network/allocate.go b/lib/network/allocate.go index ddd95f6b..b3f35900 100644 --- a/lib/network/allocate.go +++ b/lib/network/allocate.go @@ -62,7 +62,7 @@ func (m *manager) CreateAllocation(ctx context.Context, req AllocateRequest) (*N m.recordTAPOperation(ctx, "create") log.InfoContext(ctx, "allocated network", - "id", req.InstanceID, + "instance_id", req.InstanceID, "instance_name", req.InstanceName, "network", "default", "ip", ip, @@ -115,7 +115,7 @@ func (m *manager) RecreateAllocation(ctx context.Context, instanceID string) err m.recordTAPOperation(ctx, "create") log.InfoContext(ctx, "recreated network for restore", - "id", instanceID, + "instance_id", instanceID, "network", "default", "tap", alloc.TAPDevice) @@ -145,7 +145,7 @@ func (m *manager) ReleaseAllocation(ctx context.Context, alloc *Allocation) erro } log.InfoContext(ctx, "released network", - "id", alloc.InstanceID, + "instance_id", alloc.InstanceID, "network", alloc.Network, "ip", alloc.IP) diff --git a/lib/network/derive.go b/lib/network/derive.go index 19af42a5..86c3bb22 100644 --- a/lib/network/derive.go +++ b/lib/network/derive.go @@ -25,7 +25,7 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All // 1. Load instance metadata to get instance name and network status meta, err := m.loadInstanceMetadata(instanceID) if err != nil { - log.DebugContext(ctx, "failed to load instance metadata", "id", instanceID, "error", err) + log.DebugContext(ctx, "failed to load instance metadata", "instance_id", instanceID, "error", err) return nil, err }