Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ Hypeman can be configured using the following environment variables:
| `DNS_SERVER` | DNS server IP address for VMs | `1.1.1.1` |
| `MAX_CONCURRENT_BUILDS` | Maximum number of concurrent image builds | `1` |
| `MAX_OVERLAY_SIZE` | Maximum size for overlay filesystem | `100GB` |
| `ENV` | Deployment environment (filters telemetry, e.g. your name for dev) | `unset` |
| `OTEL_ENABLED` | Enable OpenTelemetry traces/metrics | `false` |
| `OTEL_ENDPOINT` | OTLP gRPC endpoint | `127.0.0.1:4317` |
| `OTEL_SERVICE_INSTANCE_ID` | Instance ID for telemetry (differentiates multiple servers) | hostname |
| `LOG_LEVEL` | Default log level (debug, info, warn, error) | `info` |
| `LOG_LEVEL_<SUBSYSTEM>` | Per-subsystem log level (API, IMAGES, INSTANCES, NETWORK, VOLUMES, VMM, SYSTEM, EXEC) | inherits default |

**Important: Subnet Configuration**

Expand Down Expand Up @@ -159,6 +165,42 @@ make dev
```
The server will start on port 8080 (configurable via `PORT` environment variable).

#### Local OpenTelemetry (optional)

To collect traces and metrics locally, run the Grafana LGTM stack (Loki, Grafana, Tempo, Mimir):

```bash
# Start Grafana LGTM (UI at http://localhost:3000, login: admin/admin)
# Note, if you are developing on a shared server, you can use the same LGTM stack as your peer(s)
# You will be able to sort your metrics, traces, and logs using the ENV configuration (see below)
docker run -d --name lgtm \
-p 127.0.0.1:3000:3000 \
-p 127.0.0.1:4317:4317 \
-p 127.0.0.1:4318:4318 \
-p 127.0.0.1:9090:9090 \
-p 127.0.0.1:4040:4040 \
grafana/otel-lgtm:latest

# If developing on a remote server, forward the port to your local machine:
# ssh -L 3001:localhost:3000 your-server (then open http://localhost:3001)

# Enable OTel in .env (set ENV to your name to filter your telemetry)
echo "OTEL_ENABLED=true" >> .env
echo "ENV=yourname" >> .env

# Restart dev server
make dev
```

Open http://localhost:3000 to view traces (Tempo), metrics (Mimir), and logs (Loki) in Grafana.

**Import the Hypeman dashboard:**
1. Go to Dashboards → New → Import
2. Upload `dashboards/hypeman.json` or paste its contents
3. Select the Prometheus datasource and click Import

Use the Environment/Instance dropdowns to filter by `deployment.environment` or `service.instance.id`.

### Testing

Network tests require elevated permissions to create bridges and TAP devices.
Expand Down
8 changes: 4 additions & 4 deletions cmd/api/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ func newTestService(t *testing.T) *ApiService {
}

p := paths.New(cfg.DataDir)
imageMgr, err := images.NewManager(p, 1)
imageMgr, err := images.NewManager(p, 1, nil)
if err != nil {
t.Fatalf("failed to create image manager: %v", err)
}

systemMgr := system.NewManager(p)
networkMgr := network.NewManager(p, cfg)
volumeMgr := volumes.NewManager(p, 0) // 0 = unlimited storage
networkMgr := network.NewManager(p, cfg, nil)
volumeMgr := volumes.NewManager(p, 0, nil) // 0 = unlimited storage
limits := instances.ResourceLimits{
MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB
}
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits)
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil)

// Register cleanup for orphaned Cloud Hypervisor processes
t.Cleanup(func() {
Expand Down
8 changes: 4 additions & 4 deletions cmd/api/api/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func (s *ApiService) ListImages(ctx context.Context, request oapi.ListImagesRequ

domainImages, err := s.ImageManager.ListImages(ctx)
if err != nil {
log.Error("failed to list images", "error", err)
log.ErrorContext(ctx, "failed to list images", "error", err)
return oapi.ListImages500JSONResponse{
Code: "internal_error",
Message: "failed to list images",
Expand Down Expand Up @@ -50,7 +50,7 @@ func (s *ApiService) CreateImage(ctx context.Context, request oapi.CreateImageRe
Message: "image not found",
}, nil
default:
log.Error("failed to create image", "error", err, "name", request.Body.Name)
log.ErrorContext(ctx, "failed to create image", "error", err, "name", request.Body.Name)
return oapi.CreateImage500JSONResponse{
Code: "internal_error",
Message: "failed to create image",
Expand All @@ -72,7 +72,7 @@ func (s *ApiService) GetImage(ctx context.Context, request oapi.GetImageRequestO
Message: "image not found",
}, nil
default:
log.Error("failed to get image", "error", err, "name", request.Name)
log.ErrorContext(ctx, "failed to get image", "error", err, "name", request.Name)
return oapi.GetImage500JSONResponse{
Code: "internal_error",
Message: "failed to get image",
Expand All @@ -94,7 +94,7 @@ func (s *ApiService) DeleteImage(ctx context.Context, request oapi.DeleteImageRe
Message: "image not found",
}, nil
default:
log.Error("failed to delete image", "error", err, "name", request.Name)
log.ErrorContext(ctx, "failed to delete image", "error", err, "name", request.Name)
return oapi.DeleteImage500JSONResponse{
Code: "internal_error",
Message: "failed to delete image",
Expand Down
24 changes: 12 additions & 12 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ func (s *ApiService) ListInstances(ctx context.Context, request oapi.ListInstanc

domainInsts, err := s.InstanceManager.ListInstances(ctx)
if err != nil {
log.Error("failed to list instances", "error", err)
log.ErrorContext(ctx, "failed to list instances", "error", err)
return oapi.ListInstances500JSONResponse{
Code: "internal_error",
Message: "failed to list instances",
}, nil
}

oapiInsts := make([]oapi.Instance, len(domainInsts))
for i, inst := range domainInsts {
oapiInsts[i] = instanceToOAPI(inst)
}

return oapi.ListInstances200JSONResponse(oapiInsts), nil
}

Expand Down Expand Up @@ -160,7 +160,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
Message: err.Error(),
}, nil
default:
log.Error("failed to create instance", "error", err, "image", request.Body.Image)
log.ErrorContext(ctx, "failed to create instance", "error", err, "image", request.Body.Image)
return oapi.CreateInstance500JSONResponse{
Code: "internal_error",
Message: "failed to create instance",
Expand Down Expand Up @@ -195,7 +195,7 @@ func (s *ApiService) GetInstance(ctx context.Context, request oapi.GetInstanceRe
Message: "multiple instances have this name, use instance ID instead",
}, nil
default:
log.Error("failed to get instance", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to get instance", "error", err, "id", request.Id)
return oapi.GetInstance500JSONResponse{
Code: "internal_error",
Message: "failed to get instance",
Expand Down Expand Up @@ -235,7 +235,7 @@ func (s *ApiService) DeleteInstance(ctx context.Context, request oapi.DeleteInst
Message: "instance not found",
}, nil
default:
log.Error("failed to delete instance", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to delete instance", "error", err, "id", request.Id)
return oapi.DeleteInstance500JSONResponse{
Code: "internal_error",
Message: "failed to delete instance",
Expand Down Expand Up @@ -280,7 +280,7 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn
Message: err.Error(),
}, nil
default:
log.Error("failed to standby instance", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to standby instance", "error", err, "id", request.Id)
return oapi.StandbyInstance500JSONResponse{
Code: "internal_error",
Message: "failed to standby instance",
Expand Down Expand Up @@ -325,7 +325,7 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn
Message: err.Error(),
}, nil
default:
log.Error("failed to restore instance", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to restore instance", "error", err, "id", request.Id)
return oapi.RestoreInstance500JSONResponse{
Code: "internal_error",
Message: "failed to restore instance",
Expand Down Expand Up @@ -417,18 +417,18 @@ func (s *ApiService) GetInstanceLogs(ctx context.Context, request oapi.GetInstan

// AttachVolume attaches a volume to an instance (not yet implemented)
func (s *ApiService) AttachVolume(ctx context.Context, request oapi.AttachVolumeRequestObject) (oapi.AttachVolumeResponseObject, error) {
return oapi.AttachVolume500JSONResponse{
return oapi.AttachVolume500JSONResponse{
Code: "not_implemented",
Message: "volume attachment not yet implemented",
}, nil
}, nil
}

// DetachVolume detaches a volume from an instance (not yet implemented)
func (s *ApiService) DetachVolume(ctx context.Context, request oapi.DetachVolumeRequestObject) (oapi.DetachVolumeResponseObject, error) {
return oapi.DetachVolume500JSONResponse{
return oapi.DetachVolume500JSONResponse{
Code: "not_implemented",
Message: "volume detachment not yet implemented",
}, nil
}, nil
}

// instanceToOAPI converts domain Instance to OAPI Instance
Expand Down
16 changes: 7 additions & 9 deletions cmd/api/api/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@ func (s *ApiService) ListVolumes(ctx context.Context, request oapi.ListVolumesRe

domainVols, err := s.VolumeManager.ListVolumes(ctx)
if err != nil {
log.Error("failed to list volumes", "error", err)
log.ErrorContext(ctx, "failed to list volumes", "error", err)
return oapi.ListVolumes500JSONResponse{
Code: "internal_error",
Message: "failed to list volumes",
}, nil
}

oapiVols := make([]oapi.Volume, len(domainVols))
for i, vol := range domainVols {
oapiVols[i] = volumeToOAPI(vol)
}

return oapi.ListVolumes200JSONResponse(oapiVols), nil
}

Expand All @@ -56,7 +56,7 @@ func (s *ApiService) CreateVolume(ctx context.Context, request oapi.CreateVolume
Message: "volume with this ID already exists",
}, nil
}
log.Error("failed to create volume", "error", err, "name", request.JSONBody.Name)
log.ErrorContext(ctx, "failed to create volume", "error", err, "name", request.JSONBody.Name)
return oapi.CreateVolume500JSONResponse{
Code: "internal_error",
Message: "failed to create volume",
Expand Down Expand Up @@ -171,7 +171,7 @@ func (s *ApiService) createVolumeFromMultipart(ctx context.Context, multipartRea
Message: "volume with this ID already exists",
}, nil
}
log.Error("failed to create volume from archive", "error", err, "name", name)
log.ErrorContext(ctx, "failed to create volume from archive", "error", err, "name", name)
return oapi.CreateVolume500JSONResponse{
Code: "internal_error",
Message: "failed to create volume",
Expand Down Expand Up @@ -222,7 +222,7 @@ func (s *ApiService) GetVolume(ctx context.Context, request oapi.GetVolumeReques
Message: "multiple volumes have this name, use volume ID instead",
}, nil
default:
log.Error("failed to get volume", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to get volume", "error", err, "id", request.Id)
return oapi.GetVolume500JSONResponse{
Code: "internal_error",
Message: "failed to get volume",
Expand Down Expand Up @@ -269,7 +269,7 @@ func (s *ApiService) DeleteVolume(ctx context.Context, request oapi.DeleteVolume
Message: "volume is in use by an instance",
}, nil
default:
log.Error("failed to delete volume", "error", err, "id", request.Id)
log.ErrorContext(ctx, "failed to delete volume", "error", err, "id", request.Id)
return oapi.DeleteVolume500JSONResponse{
Code: "internal_error",
Message: "failed to delete volume",
Expand Down Expand Up @@ -302,5 +302,3 @@ func volumeToOAPI(vol volumes.Volume) oapi.Volume {

return oapiVol
}


79 changes: 76 additions & 3 deletions cmd/api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,52 @@ package config

import (
"os"
"runtime/debug"
"strconv"

"github.com/joho/godotenv"
)

func getHostname() string {
if h, err := os.Hostname(); err == nil {
return h
}
return "unknown"
}

// getBuildVersion extracts version info from Go's embedded build info.
// Returns git short hash + "-dirty" suffix if uncommitted changes, or "unknown" if unavailable.
func getBuildVersion() string {
info, ok := debug.ReadBuildInfo()
if !ok {
return "unknown"
}

var revision string
var dirty bool
for _, s := range info.Settings {
switch s.Key {
case "vcs.revision":
revision = s.Value
case "vcs.modified":
dirty = s.Value == "true"
}
}

if revision == "" {
return "unknown"
}

// Use short hash (8 chars)
if len(revision) > 8 {
revision = revision[:8]
}
if dirty {
revision += "-dirty"
}
return revision
}

type Config struct {
Port string
DataDir string
Expand All @@ -23,13 +64,25 @@ type Config struct {
LogRotateInterval string

// Resource limits - per instance
MaxVcpusPerInstance int // Max vCPUs for a single VM (0 = unlimited)
MaxMemoryPerInstance string // Max memory for a single VM (0 = unlimited)
MaxVcpusPerInstance int // Max vCPUs for a single VM (0 = unlimited)
MaxMemoryPerInstance string // Max memory for a single VM (0 = unlimited)

// Resource limits - aggregate
MaxTotalVcpus int // Aggregate vCPU limit across all instances (0 = unlimited)
MaxTotalMemory string // Aggregate memory limit across all instances (0 = unlimited)
MaxTotalVolumeStorage string // Total volume storage limit (0 = unlimited)

// OpenTelemetry configuration
OtelEnabled bool // Enable OpenTelemetry
OtelEndpoint string // OTLP endpoint (gRPC)
OtelServiceName string // Service name for tracing
OtelServiceInstanceID string // Service instance ID (default: hostname)
OtelInsecure bool // Disable TLS for OTLP
Version string // Application version for telemetry
Env string // Deployment environment (e.g., dev, staging, prod)

// Logging configuration
LogLevel string // Default log level (debug, info, warn, error)
}

// Load loads configuration from environment variables
Expand All @@ -43,7 +96,7 @@ func Load() *Config {
DataDir: getEnv("DATA_DIR", "/var/lib/hypeman"),
BridgeName: getEnv("BRIDGE_NAME", "vmbr0"),
SubnetCIDR: getEnv("SUBNET_CIDR", "10.100.0.0/16"),
SubnetGateway: getEnv("SUBNET_GATEWAY", ""), // empty = derived as first IP from subnet
SubnetGateway: getEnv("SUBNET_GATEWAY", ""), // empty = derived as first IP from subnet
UplinkInterface: getEnv("UPLINK_INTERFACE", ""), // empty = auto-detect from default route
JwtSecret: getEnv("JWT_SECRET", ""),
DNSServer: getEnv("DNS_SERVER", "1.1.1.1"),
Expand All @@ -61,6 +114,18 @@ func Load() *Config {
MaxTotalVcpus: getEnvInt("MAX_TOTAL_VCPUS", 0),
MaxTotalMemory: getEnv("MAX_TOTAL_MEMORY", ""),
MaxTotalVolumeStorage: getEnv("MAX_TOTAL_VOLUME_STORAGE", ""),

// OpenTelemetry configuration
OtelEnabled: getEnvBool("OTEL_ENABLED", false),
OtelEndpoint: getEnv("OTEL_ENDPOINT", "127.0.0.1:4317"),
OtelServiceName: getEnv("OTEL_SERVICE_NAME", "hypeman"),
OtelServiceInstanceID: getEnv("OTEL_SERVICE_INSTANCE_ID", getHostname()),
OtelInsecure: getEnvBool("OTEL_INSECURE", true),
Version: getEnv("VERSION", getBuildVersion()),
Env: getEnv("ENV", "unset"),

// Logging configuration
LogLevel: getEnv("LOG_LEVEL", "info"),
}

return cfg
Expand All @@ -82,3 +147,11 @@ func getEnvInt(key string, defaultValue int) int {
return defaultValue
}

func getEnvBool(key string, defaultValue bool) bool {
if value := os.Getenv(key); value != "" {
if boolVal, err := strconv.ParseBool(value); err == nil {
return boolVal
}
}
return defaultValue
}
Loading
Loading