diff --git a/.air.toml b/.air.toml index 05d74ce2..6010fd22 100644 --- a/.air.toml +++ b/.air.toml @@ -5,7 +5,7 @@ tmp_dir = "tmp" [build] args_bin = [] bin = "./tmp/main" - cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api" + cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && sudo setcap cap_net_admin,cap_net_bind_service=+eip ./tmp/main" delay = 1000 exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"] exclude_file = [] @@ -20,6 +20,7 @@ tmp_dir = "tmp" log = "build-errors.log" poll = false poll_interval = 0 + post_cmd = [] rerun = false rerun_delay = 500 send_interrupt = false diff --git a/.github/workflows/stainless-sdks.yml b/.github/workflows/stainless-sdks.yml new file mode 100644 index 00000000..b0173fc4 --- /dev/null +++ b/.github/workflows/stainless-sdks.yml @@ -0,0 +1,65 @@ +name: Stainless SDK preview on PRs + +on: + pull_request: + types: + - opened + - synchronize + - reopened + - closed + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + STAINLESS_ORG: ${{ vars.STAINLESS_ORG }} + STAINLESS_PROJECT: ${{ vars.STAINLESS_PROJECT }} + OAS_PATH: openapi.yaml + CONFIG_PATH: stainless.yaml + +jobs: + preview: + if: github.event.action != 'closed' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Run preview builds + uses: stainless-api/upload-openapi-spec-action/preview@v1 + with: + stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} + org: ${{ env.STAINLESS_ORG }} + project: ${{ env.STAINLESS_PROJECT }} + oas_path: ${{ env.OAS_PATH }} + config_path: ${{ env.CONFIG_PATH }} + make_comment: true + github_token: ${{ secrets.GITHUB_TOKEN }} + + merge: + if: github.event.action == 'closed' && github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Run merge build + uses: stainless-api/upload-openapi-spec-action/merge@v1 + with: + stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} + org: ${{ env.STAINLESS_ORG }} + project: ${{ env.STAINLESS_PROJECT }} + oas_path: ${{ env.OAS_PATH }} + make_comment: true + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/Makefile b/Makefile index 62832f7c..92f1b512 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,42 @@ dev: $(AIR) $(AIR) -c .air.toml # Run tests +# Compile test binaries and grant network capabilities (runs as user, not root) +# Usage: make test - runs all tests +# make test TEST=TestCreateInstanceWithNetwork - runs specific test test: ensure-ch-binaries lib/system/exec_agent/exec-agent - go test -tags containers_image_openpgp -v -timeout 30s ./... + @echo "Building test binaries..." + @mkdir -p $(BIN_DIR)/tests + @for pkg in $$(go list -tags containers_image_openpgp ./...); do \ + pkg_name=$$(basename $$pkg); \ + go test -c -tags containers_image_openpgp -o $(BIN_DIR)/tests/$$pkg_name.test $$pkg 2>/dev/null || true; \ + done + @echo "Granting capabilities to test binaries..." + @for test in $(BIN_DIR)/tests/*.test; do \ + if [ -f "$$test" ]; then \ + sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' $$test 2>/dev/null || true; \ + fi; \ + done + @echo "Running tests as current user with capabilities..." + @if [ -n "$(TEST)" ]; then \ + echo "Running specific test: $(TEST)"; \ + for test in $(BIN_DIR)/tests/*.test; do \ + if [ -f "$$test" ]; then \ + echo ""; \ + echo "Checking $$(basename $$test) for $(TEST)..."; \ + $$test -test.run=$(TEST) -test.v -test.timeout=60s 2>&1 | grep -q "PASS\|FAIL" && \ + $$test -test.run=$(TEST) -test.v -test.timeout=60s || true; \ + fi; \ + done; \ + else \ + for test in $(BIN_DIR)/tests/*.test; do \ + if [ -f "$$test" ]; then \ + echo ""; \ + echo "Running $$(basename $$test)..."; \ + $$test -test.v -test.parallel=10 -test.timeout=60s || exit 1; \ + fi; \ + done; \ + fi # Generate JWT token for testing # Usage: make gen-jwt [USER_ID=test-user] @@ -131,4 +165,3 @@ clean: rm -f lib/exec/exec.pb.go rm -f lib/exec/exec_grpc.pb.go rm -f lib/system/exec_agent/exec-agent - diff --git a/README.md b/README.md index 07fce66d..dc7a3577 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,17 @@ Run containerized workloads in VMs, powered by [Cloud Hypervisor](https://github ### Prerequisites -**Go 1.25.4+**, **KVM**, **erofs-utils** +**Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq** ```bash +# Verify prerequisites mkfs.erofs --version +dnsmasq --version +``` + +**Install on Debian/Ubuntu:** +```bash +sudo apt-get install erofs-utils dnsmasq ``` **KVM Access:** User must be in `kvm` group for VM access: @@ -20,13 +27,100 @@ sudo usermod -aG kvm $USER # Log out and back in, or use: newgrp kvm ``` +**Network Capabilities:** + +Before running or testing Hypeman, ensure IPv4 forwarding is enabled: + +```bash +# Enable IPv4 forwarding (temporary - until reboot) +sudo sysctl -w net.ipv4.ip_forward=1 + +# Enable IPv4 forwarding (persistent across reboots) +echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf +sudo sysctl -p +``` + +**Why:** Required for routing traffic between VM network and external network. + +The hypeman binary needs network administration capabilities to create bridges and TAP devices: +```bash +# After building, grant network capabilities +sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' /path/to/hypeman + +# For development builds +sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' ./bin/hypeman + +# Verify capabilities +getcap ./bin/hypeman +``` + +**Note:** The `i` (inheritable) flag allows child processes spawned by hypeman (like `ip` and `iptables` commands) to inherit capabilities via the ambient capability set. + +**Note:** These capabilities must be reapplied after each rebuild. For production deployments, set capabilities on the installed binary. For local testing, this is handled automatically in `make test`. + ### Configuration #### Environment variables +Hypeman can be configured using the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `PORT` | HTTP server port | `8080` | +| `DATA_DIR` | Directory for storing VM images, volumes, and other data | `/var/lib/hypeman` | +| `BRIDGE_NAME` | Name of the network bridge for VM networking | `vmbr0` | +| `SUBNET_CIDR` | CIDR notation for the VM network subnet (gateway derived automatically) | `10.100.0.0/16` | +| `UPLINK_INTERFACE` | Host network interface to use for VM internet access | _(auto-detect)_ | +| `JWT_SECRET` | Secret key for JWT authentication (required for production) | _(empty)_ | +| `DNS_SERVER` | DNS server IP address for VMs | `1.1.1.1` | +| `MAX_CONCURRENT_BUILDS` | Maximum number of concurrent image builds | `1` | +| `MAX_OVERLAY_SIZE` | Maximum size for overlay filesystem | `100GB` | + +**Important: Subnet Configuration** + +The default subnet `10.100.0.0/16` is chosen to avoid common conflicts. Hypeman will detect conflicts with existing routes on startup and fail with guidance. + +If you need a different subnet, set `SUBNET_CIDR` in your environment. The gateway is automatically derived as the first IP in the subnet (e.g., `10.100.0.0/16` → `10.100.0.1`). + +**Alternative subnets if needed:** +- `172.30.0.0/16` - Private range between common Docker (172.17.x.x) and AWS (172.31.x.x) ranges +- `10.200.0.0/16` - Another private range option + +**Example:** +```bash +# In your .env file +SUBNET_CIDR=172.30.0.0/16 +``` + +**Finding the uplink interface (`UPLINK_INTERFACE`)** + +`UPLINK_INTERFACE` tells Hypeman which host interface to use for routing VM traffic to the outside world (for iptables MASQUERADE rules). On many hosts this is `eth0`, but laptops and more complex setups often use Wi‑Fi or other names. + +**Quick way to discover it:** +```bash +# Ask the kernel which interface is used to reach the internet +ip route get 1.1.1.1 +``` +Look for the `dev` field in the output, for example: +```text +1.1.1.1 via 192.168.12.1 dev wlp2s0 src 192.168.12.98 +``` +In this case, `wlp2s0` is the uplink interface, so you would set: +```bash +UPLINK_INTERFACE=wlp2s0 +``` + +You can also inspect all routes: +```bash +ip route show +``` +Pick the interface used by the default route (usually the line starting with `default`). Avoid using local bridges like `docker0`, `br-...`, `virbr0`, or `vmbr0` as the uplink; those are typically internal virtual networks, not your actual internet-facing interface. + +**Setup:** + ```bash cp .env.example .env -# Edit .env and set JWT_SECRET +# Edit .env and set JWT_SECRET and other configuration values ``` #### Data directory @@ -54,18 +148,12 @@ make build ``` ### Running the Server -1. Copy the example environment file and modify the values: -```bash -cp .env.example .env -# Edit .env and set JWT_SECRET and other configuration values -``` - -2. Generate a JWT token for testing (optional): +1. Generate a JWT token for testing (optional): ```bash make gen-jwt ``` -3. Start the server with hot-reload for development: +2. Start the server with hot-reload for development: ```bash make dev ``` @@ -73,10 +161,14 @@ The server will start on port 8080 (configurable via `PORT` environment variable ### Testing +Network tests require elevated permissions to create bridges and TAP devices. + ```bash make test ``` +The test command compiles test binaries, grants capabilities via `sudo setcap`, then runs tests as the current user (not root). You may be prompted for your sudo password during the capability grant step. + ### Code Generation After modifying `openapi.yaml`, regenerate the Go code: diff --git a/cmd/api/api/api.go b/cmd/api/api/api.go index c2e8df58..beb6acd9 100644 --- a/cmd/api/api/api.go +++ b/cmd/api/api/api.go @@ -4,6 +4,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/volumes" ) @@ -14,6 +15,7 @@ type ApiService struct { ImageManager images.Manager InstanceManager instances.Manager VolumeManager volumes.Manager + NetworkManager network.Manager } var _ oapi.StrictServerInterface = (*ApiService)(nil) @@ -24,12 +26,14 @@ func New( imageManager images.Manager, instanceManager instances.Manager, volumeManager volumes.Manager, + networkManager network.Manager, ) *ApiService { return &ApiService{ Config: config, ImageManager: imageManager, InstanceManager: instanceManager, VolumeManager: volumeManager, + NetworkManager: networkManager, } } diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index e8d26b3b..09c6f2bf 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -10,6 +10,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" @@ -28,8 +29,9 @@ func newTestService(t *testing.T) *ApiService { } systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg) maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB for tests - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, maxOverlaySize) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, maxOverlaySize) volumeMgr := volumes.NewManager(p) // Register cleanup for orphaned Cloud Hypervisor processes diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 77c3e12a..ec00a6e8 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -76,10 +76,17 @@ func TestExecInstanceNonTTY(t *testing.T) { // Create instance t.Log("Creating instance...") + networkDisabled := false instResp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ Body: &oapi.CreateInstanceRequest{ Name: "exec-test", Image: "docker.io/library/nginx:alpine", + Network: &struct { + Enabled *bool `json:"enabled,omitempty"` + Name *string `json:"name,omitempty"` + }{ + Enabled: &networkDisabled, + }, }, }) require.NoError(t, err) diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 5834c206..c4008979 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -9,7 +9,9 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/oapi" + "github.com/samber/lo" ) // ListInstances lists all instances @@ -86,14 +88,21 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst env = *request.Body.Env } + // Parse network enabled (default: true) + networkEnabled := true + if request.Body.Network != nil && request.Body.Network.Enabled != nil { + networkEnabled = *request.Body.Network.Enabled + } + domainReq := instances.CreateInstanceRequest{ - Name: request.Body.Name, - Image: request.Body.Image, - Size: size, - HotplugSize: hotplugSize, - OverlaySize: overlaySize, - Vcpus: vcpus, - Env: env, + Name: request.Body.Name, + Image: request.Body.Image, + Size: size, + HotplugSize: hotplugSize, + OverlaySize: overlaySize, + Vcpus: vcpus, + Env: env, + NetworkEnabled: networkEnabled, } inst, err := s.InstanceManager.CreateInstance(ctx, domainReq) @@ -109,12 +118,17 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst Code: "already_exists", Message: "instance already exists", }, nil + case errors.Is(err, network.ErrNameExists): + return oapi.CreateInstance400JSONResponse{ + Code: "name_conflict", + Message: err.Error(), + }, nil default: - log.Error("failed to create instance", "error", err, "image", request.Body.Image) - return oapi.CreateInstance500JSONResponse{ - Code: "internal_error", - Message: "failed to create instance", - }, nil + log.Error("failed to create instance", "error", err, "image", request.Body.Image) + return oapi.CreateInstance500JSONResponse{ + Code: "internal_error", + Message: "failed to create instance", + }, nil } } return oapi.CreateInstance201JSONResponse(instanceToOAPI(*inst)), nil @@ -293,19 +307,35 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance { hotplugSizeStr := datasize.ByteSize(inst.HotplugSize).HR() overlaySizeStr := datasize.ByteSize(inst.OverlaySize).HR() + // Build network object with ip/mac nested inside + netObj := &struct { + Enabled *bool `json:"enabled,omitempty"` + Ip *string `json:"ip"` + Mac *string `json:"mac"` + Name *string `json:"name,omitempty"` + }{ + Enabled: lo.ToPtr(inst.NetworkEnabled), + } + if inst.NetworkEnabled { + netObj.Name = lo.ToPtr("default") + netObj.Ip = lo.ToPtr(inst.IP) + netObj.Mac = lo.ToPtr(inst.MAC) + } + oapiInst := oapi.Instance{ Id: inst.Id, Name: inst.Name, Image: inst.Image, State: oapi.InstanceState(inst.State), - Size: &sizeStr, - HotplugSize: &hotplugSizeStr, - OverlaySize: &overlaySizeStr, - Vcpus: &inst.Vcpus, + Size: lo.ToPtr(sizeStr), + HotplugSize: lo.ToPtr(hotplugSizeStr), + OverlaySize: lo.ToPtr(overlaySizeStr), + Vcpus: lo.ToPtr(inst.Vcpus), + Network: netObj, CreatedAt: inst.CreatedAt, StartedAt: inst.StartedAt, StoppedAt: inst.StoppedAt, - HasSnapshot: &inst.HasSnapshot, + HasSnapshot: lo.ToPtr(inst.HasSnapshot), } if len(inst.Env) > 0 { diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 1d2d98b7..0c1a32c4 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -93,6 +93,7 @@ func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { overlaySize := "5GB" t.Log("Creating instance with human-readable sizes...") + networkEnabled := false resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ Body: &oapi.CreateInstanceRequest{ Name: "test-sizes", @@ -100,6 +101,11 @@ func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { Size: &size, HotplugSize: &hotplugSize, OverlaySize: &overlaySize, + Network: &struct { + Enabled *bool `json:"enabled,omitempty"` + }{ + Enabled: &networkEnabled, + }, }, }) require.NoError(t, err) @@ -132,12 +138,18 @@ func TestCreateInstance_InvalidSizeFormat(t *testing.T) { // Test with invalid size format invalidSize := "not-a-size" + networkDisabled := false resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ Body: &oapi.CreateInstanceRequest{ Name: "test-invalid", Image: "docker.io/library/alpine:latest", Size: &invalidSize, + Network: &struct { + Enabled *bool `json:"enabled,omitempty"` + }{ + Enabled: &networkDisabled, + }, }, }) require.NoError(t, err) diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index 59d915de..e454f83e 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -13,6 +13,7 @@ type Config struct { BridgeName string SubnetCIDR string SubnetGateway string + UplinkInterface string JwtSecret string DNSServer string MaxConcurrentBuilds int @@ -29,8 +30,9 @@ func Load() *Config { Port: getEnv("PORT", "8080"), DataDir: getEnv("DATA_DIR", "/var/lib/hypeman"), BridgeName: getEnv("BRIDGE_NAME", "vmbr0"), - SubnetCIDR: getEnv("SUBNET_CIDR", "192.168.100.0/24"), - SubnetGateway: getEnv("SUBNET_GATEWAY", "192.168.100.1"), + SubnetCIDR: getEnv("SUBNET_CIDR", "10.100.0.0/16"), + SubnetGateway: getEnv("SUBNET_GATEWAY", ""), // empty = derived as first IP from subnet + UplinkInterface: getEnv("UPLINK_INTERFACE", ""), // empty = auto-detect from default route JwtSecret: getEnv("JWT_SECRET", ""), DNSServer: getEnv("DNS_SERVER", "1.1.1.1"), MaxConcurrentBuilds: getEnvInt("MAX_CONCURRENT_BUILDS", 1), diff --git a/cmd/api/main.go b/cmd/api/main.go index 4b5b7367..85c13a64 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -19,6 +19,7 @@ import ( nethttpmiddleware "github.com/oapi-codegen/nethttp-middleware" "github.com/onkernel/hypeman" "github.com/onkernel/hypeman/cmd/api/api" + "github.com/onkernel/hypeman/lib/instances" mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" "golang.org/x/sync/errgroup" @@ -59,6 +60,16 @@ func run() error { logger.Info("System files ready", "kernel", kernelVer) + // Initialize network manager (creates default network if needed) + // Get running instance IDs for TAP cleanup + runningIDs := getRunningInstanceIDs(app) + logger.Info("Initializing network manager...") + if err := app.NetworkManager.Initialize(app.Ctx, runningIDs); err != nil { + logger.Error("failed to initialize network manager", "error", err) + return fmt.Errorf("initialize network manager: %w", err) + } + logger.Info("Network manager initialized") + // Create router r := chi.NewRouter() @@ -169,3 +180,18 @@ func run() error { return grp.Wait() } +// getRunningInstanceIDs returns IDs of instances currently in Running state +func getRunningInstanceIDs(app *application) []string { + allInstances, err := app.InstanceManager.ListInstances(app.Ctx) + if err != nil { + return nil + } + var running []string + for _, inst := range allInstances { + if inst.State == instances.StateRunning { + running = append(running, inst.Id) + } + } + return running +} + diff --git a/cmd/api/wire.go b/cmd/api/wire.go index 18070460..ed881ee9 100644 --- a/cmd/api/wire.go +++ b/cmd/api/wire.go @@ -11,6 +11,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/providers" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" @@ -23,6 +24,7 @@ type application struct { Config *config.Config ImageManager images.Manager SystemManager system.Manager + NetworkManager network.Manager InstanceManager instances.Manager VolumeManager volumes.Manager ApiService *api.ApiService @@ -37,6 +39,7 @@ func initializeApp() (*application, func(), error) { providers.ProvidePaths, providers.ProvideImageManager, providers.ProvideSystemManager, + providers.ProvideNetworkManager, providers.ProvideInstanceManager, providers.ProvideVolumeManager, api.New, diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 09d97820..9e6dbd16 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -12,6 +12,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/providers" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" @@ -35,18 +36,20 @@ func initializeApp() (*application, func(), error) { return nil, nil, err } systemManager := providers.ProvideSystemManager(paths) - instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager) + networkManager := providers.ProvideNetworkManager(paths, config) + instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager) if err != nil { return nil, nil, err } volumesManager := providers.ProvideVolumeManager(paths) - apiService := api.New(config, manager, instancesManager, volumesManager) + apiService := api.New(config, manager, instancesManager, volumesManager, networkManager) mainApplication := &application{ Ctx: context, Logger: logger, Config: config, ImageManager: manager, SystemManager: systemManager, + NetworkManager: networkManager, InstanceManager: instancesManager, VolumeManager: volumesManager, ApiService: apiService, @@ -64,6 +67,7 @@ type application struct { Config *config.Config ImageManager images.Manager SystemManager system.Manager + NetworkManager network.Manager InstanceManager instances.Manager VolumeManager volumes.Manager ApiService *api.ApiService diff --git a/go.mod b/go.mod index ca72b23a..c46db705 100644 --- a/go.mod +++ b/go.mod @@ -21,11 +21,16 @@ require ( github.com/opencontainers/image-spec v1.1.1 github.com/opencontainers/runtime-spec v1.2.1 github.com/opencontainers/umoci v0.6.0 + github.com/samber/lo v1.52.0 github.com/stretchr/testify v1.11.1 github.com/u-root/u-root v0.15.0 + github.com/vishvananda/netlink v1.3.1 golang.org/x/sync v0.17.0 + golang.org/x/sys v0.38.0 + golang.org/x/term v0.37.0 google.golang.org/grpc v1.77.0 google.golang.org/protobuf v1.36.10 + gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54 ) require ( @@ -67,11 +72,10 @@ require ( github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 // indirect github.com/vbatts/go-mtree v0.6.1-0.20250911112631-8307d76bc1b9 // indirect github.com/vbatts/tar-split v0.12.1 // indirect + github.com/vishvananda/netns v0.0.5 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect golang.org/x/crypto v0.43.0 // indirect golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 // indirect - golang.org/x/sys v0.38.0 // indirect - golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.30.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 95165d0d..b5a7229d 100644 --- a/go.sum +++ b/go.sum @@ -149,6 +149,8 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rootless-containers/proto/go-proto v0.0.0-20230421021042-4cd87ebadd67 h1:58jvc5cZ+hGKidQ4Z37/+rj9eQxRRjOOsqNEwPSZXR4= github.com/rootless-containers/proto/go-proto v0.0.0-20230421021042-4cd87ebadd67/go.mod h1:LLjEAc6zmycfeN7/1fxIphWQPjHpTt7ElqT7eVf8e4A= +github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw= +github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yfB+If0vjp97vuT74F72r8hfRpP8jLU0= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -179,6 +181,10 @@ github.com/vbatts/go-mtree v0.6.1-0.20250911112631-8307d76bc1b9 h1:R6l9BtUe83abU github.com/vbatts/go-mtree v0.6.1-0.20250911112631-8307d76bc1b9/go.mod h1:W7bcG9PCn6lFY+ljGlZxx9DONkxL3v8a7HyN+PrSrjA= github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo= github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= +github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= +github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= +github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= +github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= @@ -210,6 +216,8 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= @@ -246,3 +254,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= +gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54 h1:eYMn6Z3T40m4f9vVYRcsjvX4eEv7ng7FgrZTbadSyBs= +gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54/go.mod h1:W1ZgZ/Dh85TgSZWH67l2jKVpDE5bjIaut7rjwwOiHzQ= diff --git a/lib/instances/README.md b/lib/instances/README.md index fd8ebb74..a5de128d 100644 --- a/lib/instances/README.md +++ b/lib/instances/README.md @@ -46,7 +46,7 @@ Manages VM instance lifecycle using Cloud Hypervisor. console.log # Serial console (VM output) snapshots/ snapshot-latest/ # Snapshot directory - vm.json # VM configuration + config.json # VM configuration memory-ranges # Memory state ``` diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go index 50d17dd4..99fa95ab 100644 --- a/lib/instances/configdisk.go +++ b/lib/instances/configdisk.go @@ -5,16 +5,18 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/network" ) // createConfigDisk generates an erofs disk with instance configuration // The disk contains: // - /config.sh - Shell script sourced by init // - /metadata.json - JSON metadata for programmatic access -func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image) error { +func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error { // Create temporary directory for config files tmpDir, err := os.MkdirTemp("", "hypeman-config-*") if err != nil { @@ -23,7 +25,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image) erro defer os.RemoveAll(tmpDir) // Generate config.sh - configScript := m.generateConfigScript(inst, imageInfo) + configScript := m.generateConfigScript(inst, imageInfo, netConfig) configPath := filepath.Join(tmpDir, "config.sh") if err := os.WriteFile(configPath, []byte(configScript), 0644); err != nil { return fmt.Errorf("write config.sh: %w", err) @@ -62,7 +64,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image) erro } // generateConfigScript creates the shell script that will be sourced by init -func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image) string { +func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string { // Prepare entrypoint value entrypoint := "" if len(imageInfo.Entrypoint) > 0 { @@ -88,6 +90,21 @@ func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image) envLines.WriteString(fmt.Sprintf("export %s=%s\n", key, shellQuote(value))) } + // Build network configuration section + // Use netConfig directly instead of trying to derive it (VM hasn't started yet) + networkSection := "" + if inst.NetworkEnabled && netConfig != nil { + // Convert netmask to CIDR prefix length for ip command + cidr := netmaskToCIDR(netConfig.Netmask) + networkSection = fmt.Sprintf(` +# Network configuration +GUEST_IP="%s" +GUEST_CIDR="%d" +GUEST_GW="%s" +GUEST_DNS="%s" +`, netConfig.IP, cidr, netConfig.Gateway, netConfig.DNS) + } + // Generate script as a readable template block // ENTRYPOINT and CMD contain shell-quoted arrays that will be eval'd in init script := fmt.Sprintf(`#!/bin/sh @@ -99,12 +116,13 @@ CMD="%s" WORKDIR=%s # Environment variables -%s`, +%s%s`, inst.Id, entrypoint, cmd, workdir, envLines.String(), + networkSection, ) return script @@ -150,4 +168,20 @@ func shellQuoteArray(arr []string) string { return strings.Join(quoted, " ") } - +// netmaskToCIDR converts dotted decimal netmask to CIDR prefix length +// e.g., "255.255.255.0" -> 24, "255.255.0.0" -> 16 +func netmaskToCIDR(netmask string) int { + parts := strings.Split(netmask, ".") + if len(parts) != 4 { + return 24 // default to /24 + } + bits := 0 + for _, p := range parts { + n, _ := strconv.Atoi(p) + for n > 0 { + bits += n & 1 + n >>= 1 + } + } + return bits +} diff --git a/lib/instances/create.go b/lib/instances/create.go index 94db6cbe..5c4e94a8 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -9,8 +9,10 @@ import ( "github.com/nrednav/cuid2" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/vmm" + "gvisor.dev/gvisor/pkg/cleanup" ) // generateVsockCID converts first 8 chars of instance ID to a unique CID @@ -100,29 +102,43 @@ func (m *manager) createInstance( req.Env = make(map[string]string) } - // 6. Get default kernel version + // 6. Determine network based on NetworkEnabled flag + networkName := "" + if req.NetworkEnabled { + networkName = "default" + } + + // 7. Get default kernel version kernelVer := m.systemManager.GetDefaultKernelVersion() - // 7. Create instance metadata + // 8. Create instance metadata stored := &StoredMetadata{ - Id: id, - Name: req.Name, - Image: req.Image, - Size: size, - HotplugSize: hotplugSize, - OverlaySize: overlaySize, - Vcpus: vcpus, - Env: req.Env, - CreatedAt: time.Now(), - StartedAt: nil, - StoppedAt: nil, - KernelVersion: string(kernelVer), - CHVersion: vmm.V49_0, // Use latest - SocketPath: m.paths.InstanceSocket(id), - DataDir: m.paths.InstanceDir(id), - VsockCID: vsockCID, - VsockSocket: vsockSocket, - } + Id: id, + Name: req.Name, + Image: req.Image, + Size: size, + HotplugSize: hotplugSize, + OverlaySize: overlaySize, + Vcpus: vcpus, + Env: req.Env, + NetworkEnabled: req.NetworkEnabled, + CreatedAt: time.Now(), + StartedAt: nil, + StoppedAt: nil, + KernelVersion: string(kernelVer), + CHVersion: vmm.V49_0, // Use latest + SocketPath: m.paths.InstanceSocket(id), + DataDir: m.paths.InstanceDir(id), + VsockCID: vsockCID, + VsockSocket: vsockSocket, + } + + // Setup cleanup stack for automatic rollback on errors + cu := cleanup.Make(func() { + log.DebugContext(ctx, "cleaning up instance on error", "id", id) + m.deleteInstanceData(id) + }) + defer cu.Clean() // 8. Ensure directories log.DebugContext(ctx, "creating instance directories", "id", id) @@ -135,37 +151,58 @@ func (m *manager) createInstance( log.DebugContext(ctx, "creating overlay disk", "id", id, "size_bytes", stored.OverlaySize) if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil { log.ErrorContext(ctx, "failed to create overlay disk", "id", id, "error", err) - m.deleteInstanceData(id) // Cleanup return nil, fmt.Errorf("create overlay disk: %w", err) } - // 10. Create config disk (needs Instance for buildVMConfig) + // 10. Allocate network (if network enabled) + var netConfig *network.NetworkConfig + if networkName != "" { + log.DebugContext(ctx, "allocating network", "id", id, "network", networkName) + netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{ + InstanceID: id, + InstanceName: req.Name, + }) + if err != nil { + log.ErrorContext(ctx, "failed to allocate network", "id", id, "network", networkName, "error", err) + return nil, fmt.Errorf("allocate network: %w", err) + } + // Store IP/MAC in metadata (persisted with instance) + stored.IP = netConfig.IP + stored.MAC = netConfig.MAC + // Add network cleanup to stack + cu.Add(func() { + // Network cleanup: TAP devices are removed when ReleaseAllocation is called. + // In case of unexpected scenarios (like power loss), TAP devices persist until host reboot. + if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil { + m.networkManager.ReleaseAllocation(ctx, netAlloc) + } + }) + } + + // 11. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "creating config disk", "id", id) - if err := m.createConfigDisk(inst, imageInfo); err != nil { + if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to create config disk", "id", id, "error", err) - m.deleteInstanceData(id) // Cleanup return nil, fmt.Errorf("create config disk: %w", err) } - // 11. Save metadata + // 12. Save metadata log.DebugContext(ctx, "saving instance metadata", "id", id) meta := &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { log.ErrorContext(ctx, "failed to save metadata", "id", id, "error", err) - m.deleteInstanceData(id) // Cleanup return nil, fmt.Errorf("save metadata: %w", err) } - // 12. Start VMM and boot VM + // 13. Start VMM and boot VM log.InfoContext(ctx, "starting VMM and booting VM", "id", id) - if err := m.startAndBootVM(ctx, stored, imageInfo); err != nil { + if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to start and boot VM", "id", id, "error", err) - m.deleteInstanceData(id) // Cleanup return nil, err } - // 13. Update timestamp after VM is running + // 14. Update timestamp after VM is running now := time.Now() stored.StartedAt = &now @@ -176,6 +213,9 @@ func (m *manager) createInstance( log.WarnContext(ctx, "failed to update metadata after VM start", "id", id, "error", err) } + // Success - release cleanup stack (prevent cleanup) + cu.Release() + // Return instance with derived state finalInst := m.toInstance(ctx, meta) log.InfoContext(ctx, "instance created successfully", "id", id, "name", req.Name, "state", finalInst.State) @@ -219,6 +259,7 @@ func (m *manager) startAndBootVM( ctx context.Context, stored *StoredMetadata, imageInfo *images.Image, + netConfig *network.NetworkConfig, ) error { log := logger.FromContext(ctx) @@ -241,7 +282,7 @@ func (m *manager) startAndBootVM( // Build VM configuration matching Cloud Hypervisor VmConfig inst := &Instance{StoredMetadata: *stored} - vmConfig, err := m.buildVMConfig(inst, imageInfo) + vmConfig, err := m.buildVMConfig(inst, imageInfo, netConfig) if err != nil { return fmt.Errorf("build vm config: %w", err) } @@ -291,7 +332,7 @@ func (m *manager) startAndBootVM( } // buildVMConfig creates the Cloud Hypervisor VmConfig -func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.VmConfig, error) { +func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) { // Get system file paths kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion)) initrdPath, _ := m.systemManager.GetInitrdPath() @@ -358,6 +399,17 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.Vm Mode: vmm.ConsoleConfigMode("Off"), } + // Network configuration (optional, use passed config) + var nets *[]vmm.NetConfig + if netConfig != nil { + nets = &[]vmm.NetConfig{{ + Tap: &netConfig.TAPDevice, + Ip: &netConfig.IP, + Mac: &netConfig.MAC, + Mask: &netConfig.Netmask, + }} + } + // vsock configuration for remote exec vsock := vmm.VsockConfig{ Cid: inst.VsockCID, @@ -371,6 +423,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image) (vmm.Vm Disks: &disks, Serial: &serial, Console: &console, + Net: nets, Vsock: &vsock, }, nil } diff --git a/lib/instances/delete.go b/lib/instances/delete.go index 1cc65820..8bfd0c9f 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/network" ) // deleteInstance stops and deletes an instance @@ -28,7 +29,17 @@ func (m *manager) deleteInstance( inst := m.toInstance(ctx, meta) log.DebugContext(ctx, "loaded instance", "id", id, "state", inst.State) - // 2. If VMM might be running, force kill it + // 2. Get network allocation BEFORE killing VMM (while we can still query it) + var networkAlloc *network.Allocation + if inst.NetworkEnabled { + log.DebugContext(ctx, "getting network allocation", "id", id) + networkAlloc, err = m.networkManager.GetAllocation(ctx, id) + if err != nil { + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + } + } + + // 3. If VMM might be running, force kill it if inst.State.RequiresVMM() { log.DebugContext(ctx, "stopping VMM", "id", id, "state", inst.State) if err := m.killVMM(ctx, &inst); err != nil { @@ -38,7 +49,16 @@ func (m *manager) deleteInstance( } } - // 3. Delete all instance data + // 4. Release network allocation + if inst.NetworkEnabled { + log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { + // Log error but continue with cleanup + log.WarnContext(ctx, "failed to release network, continuing with cleanup", "id", id, "error", err) + } + } + + // 5. Delete all instance data log.DebugContext(ctx, "deleting instance data", "id", id) if err := m.deleteInstanceData(id); err != nil { log.ErrorContext(ctx, "failed to delete instance data", "id", id, "error", err) @@ -64,13 +84,24 @@ func (m *manager) killVMM(ctx context.Context, inst *Instance) error { // Process exists - kill it immediately with SIGKILL // No graceful shutdown needed since we're deleting all data log.DebugContext(ctx, "killing VMM process", "id", inst.Id, "pid", pid) - syscall.Kill(pid, syscall.SIGKILL) + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + log.WarnContext(ctx, "failed to kill VMM process", "id", inst.Id, "pid", pid, "error", err) + } - // Wait for process to die (SIGKILL is guaranteed, usually instant) - if !WaitForProcessExit(pid, 1*time.Second) { - log.WarnContext(ctx, "VMM process did not exit in time", "id", inst.Id, "pid", pid) - } else { - log.DebugContext(ctx, "VMM process killed successfully", "id", inst.Id, "pid", pid) + // Wait for process to die and reap it to prevent zombies + // SIGKILL should be instant, but give it a moment + for i := 0; i < 50; i++ { // 50 * 100ms = 5 seconds + var wstatus syscall.WaitStatus + wpid, err := syscall.Wait4(pid, &wstatus, syscall.WNOHANG, nil) + if err != nil || wpid == pid { + // Process reaped successfully or error (likely ECHILD if already reaped) + log.DebugContext(ctx, "VMM process killed and reaped", "id", inst.Id, "pid", pid) + break + } + if i == 49 { + log.WarnContext(ctx, "VMM process did not exit in time", "id", inst.Id, "pid", pid) + } + time.Sleep(100 * time.Millisecond) } } else { log.DebugContext(ctx, "VMM process not running", "id", inst.Id, "pid", pid) diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 52f3b76e..f246a41b 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" ) @@ -26,17 +27,19 @@ type manager struct { paths *paths.Paths imageManager images.Manager systemManager system.Manager - maxOverlaySize int64 // Maximum overlay disk size in bytes - instanceLocks sync.Map // map[string]*sync.RWMutex - per-instance locks - hostTopology *HostTopology // Cached host CPU topology + networkManager network.Manager + maxOverlaySize int64 // Maximum overlay disk size in bytes + instanceLocks sync.Map // map[string]*sync.RWMutex - per-instance locks + hostTopology *HostTopology // Cached host CPU topology } // NewManager creates a new instances manager -func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, maxOverlaySize int64) Manager { +func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, maxOverlaySize int64) Manager { return &manager{ paths: p, imageManager: imageManager, systemManager: systemManager, + networkManager: networkManager, maxOverlaySize: maxOverlaySize, instanceLocks: sync.Map{}, hostTopology: detectHostTopology(), // Detect and cache host topology diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 56682573..7d2d83d5 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -10,7 +10,9 @@ import ( "testing" "time" + "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/vmm" @@ -22,12 +24,21 @@ import ( func setupTestManager(t *testing.T) (*manager, string) { tmpDir := t.TempDir() - imageManager, err := images.NewManager(paths.New(tmpDir), 1) + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + p := paths.New(tmpDir) + imageManager, err := images.NewManager(p, 1) require.NoError(t, err) - systemManager := system.NewManager(paths.New(tmpDir)) + systemManager := system.NewManager(p) + networkManager := network.NewManager(p, cfg) maxOverlaySize := int64(100 * 1024 * 1024 * 1024) - mgr := NewManager(paths.New(tmpDir), imageManager, systemManager, maxOverlaySize).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, maxOverlaySize).(*manager) // Register cleanup to kill any orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -73,6 +84,27 @@ func waitForVMReady(ctx context.Context, socketPath string, timeout time.Duratio return fmt.Errorf("VM did not reach running state within %v", timeout) } +// waitForLogMessage polls instance logs until the message appears or times out +func waitForLogMessage(ctx context.Context, mgr *manager, instanceID, message string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + logs, err := mgr.GetInstanceLogs(ctx, instanceID, false, 200) + if err != nil { + time.Sleep(100 * time.Millisecond) + continue + } + + if strings.Contains(logs, message) { + return nil + } + + time.Sleep(100 * time.Millisecond) + } + + return fmt.Errorf("message %q not found in logs within %v", message, timeout) +} + // cleanupOrphanedProcesses kills any Cloud Hypervisor processes from metadata func cleanupOrphanedProcesses(t *testing.T, mgr *manager) { // Find all metadata files @@ -159,6 +191,7 @@ func TestCreateAndDeleteInstance(t *testing.T) { HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB Vcpus: 1, + NetworkEnabled: false, // No network for tests Env: map[string]string{ "TEST_VAR": "test_value", }, @@ -239,10 +272,19 @@ func TestStorageOperations(t *testing.T) { // Test storage layer without starting VMs tmpDir := t.TempDir() - imageManager, _ := images.NewManager(paths.New(tmpDir), 1) - systemManager := system.NewManager(paths.New(tmpDir)) + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + p := paths.New(tmpDir) + imageManager, _ := images.NewManager(p, 1) + systemManager := system.NewManager(p) + networkManager := network.NewManager(p, cfg) maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB - manager := NewManager(paths.New(tmpDir), imageManager, systemManager, maxOverlaySize).(*manager) + manager := NewManager(p, imageManager, systemManager, networkManager, maxOverlaySize).(*manager) // Test metadata doesn't exist initially _, err := manager.loadMetadata("nonexistent") @@ -343,6 +385,7 @@ func TestStandbyAndRestore(t *testing.T) { HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, + NetworkEnabled: false, // No network for tests Env: map[string]string{}, } @@ -369,6 +412,22 @@ func TestStandbyAndRestore(t *testing.T) { assert.DirExists(t, snapshotDir) assert.FileExists(t, filepath.Join(snapshotDir, "memory-ranges")) // Cloud Hypervisor creates various snapshot files, just verify directory exists + + // DEBUG: Check snapshot files (for comparison with networking test) + t.Log("DEBUG: Snapshot files for non-network instance:") + entries, _ := os.ReadDir(snapshotDir) + for _, entry := range entries { + info, _ := entry.Info() + t.Logf(" - %s (size: %d bytes)", entry.Name(), info.Size()) + } + + // DEBUG: Check console.log file size before restore + consoleLogPath := filepath.Join(tmpDir, "guests", inst.Id, "logs", "console.log") + var consoleLogSizeBefore int64 + if info, err := os.Stat(consoleLogPath); err == nil { + consoleLogSizeBefore = info.Size() + t.Logf("DEBUG: console.log size before restore: %d bytes", consoleLogSizeBefore) + } // Restore instance t.Log("Restoring instance...") @@ -376,6 +435,16 @@ func TestStandbyAndRestore(t *testing.T) { require.NoError(t, err) assert.Equal(t, StateRunning, inst.State) t.Log("Instance restored and running") + + // DEBUG: Check console.log file size after restore + if info, err := os.Stat(consoleLogPath); err == nil { + consoleLogSizeAfter := info.Size() + t.Logf("DEBUG: console.log size after restore: %d bytes", consoleLogSizeAfter) + t.Logf("DEBUG: File size diff: %d bytes", consoleLogSizeAfter-consoleLogSizeBefore) + if consoleLogSizeAfter < consoleLogSizeBefore { + t.Logf("DEBUG: WARNING! console.log was TRUNCATED (lost %d bytes)", consoleLogSizeBefore-consoleLogSizeAfter) + } + } // Cleanup (no sleep needed - DeleteInstance handles process cleanup) t.Log("Cleaning up...") diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go new file mode 100644 index 00000000..579b4b8c --- /dev/null +++ b/lib/instances/network_test.go @@ -0,0 +1,253 @@ +package instances + +import ( + "bytes" + "context" + "os" + "strings" + "testing" + "time" + + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vishvananda/netlink" +) + +// TestCreateInstanceWithNetwork tests instance creation with network allocation +// and verifies network connectivity persists after standby/restore +func TestCreateInstanceWithNetwork(t *testing.T) { + // Require KVM access + requireKVMAccess(t) + + manager, _ := setupTestManager(t) + ctx := context.Background() + + // Pull nginx:alpine image (long-running workload) + t.Log("Pulling nginx:alpine image...") + nginxImage, err := manager.imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, err) + + // Wait for image to be ready + t.Log("Waiting for image build to complete...") + imageName := nginxImage.Name + for i := 0; i < 60; i++ { + img, err := manager.imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + nginxImage = img + break + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, nginxImage.Status) + t.Log("Nginx image ready") + + // Ensure system files + t.Log("Ensuring system files...") + systemManager := manager.systemManager + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Initialize network (creates bridge if needed) + t.Log("Initializing network...") + err = manager.networkManager.Initialize(ctx, nil) + require.NoError(t, err) + t.Log("Network initialized") + + // Create instance with nginx:alpine and default network + t.Log("Creating instance with default network...") + inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ + Name: "test-net-instance", + Image: "docker.io/library/nginx:alpine", + Size: 512 * 1024 * 1024, + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 5 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: true, + }) + require.NoError(t, err) + require.NotNil(t, inst) + t.Logf("Instance created: %s", inst.Id) + + // Wait for VM to be fully ready + err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second) + require.NoError(t, err) + t.Log("VM is ready") + + // Verify network allocation + t.Log("Verifying network allocation...") + alloc, err := manager.networkManager.GetAllocation(ctx, inst.Id) + require.NoError(t, err) + require.NotNil(t, alloc, "Allocation should exist") + assert.NotEmpty(t, alloc.IP, "IP should be allocated") + assert.NotEmpty(t, alloc.MAC, "MAC should be allocated") + assert.NotEmpty(t, alloc.TAPDevice, "TAP device should be allocated") + t.Logf("Network allocated: IP=%s, MAC=%s, TAP=%s", alloc.IP, alloc.MAC, alloc.TAPDevice) + + // Verify TAP device exists + t.Log("Verifying TAP device exists...") + tap, err := netlink.LinkByName(alloc.TAPDevice) + require.NoError(t, err) + assert.True(t, strings.HasPrefix(tap.Attrs().Name, "hype-")) + assert.Equal(t, uint8(netlink.OperUp), uint8(tap.Attrs().OperState)) + t.Logf("TAP device verified: %s", alloc.TAPDevice) + + // Verify TAP attached to bridge + bridge, err := netlink.LinkByName("vmbr0") + require.NoError(t, err) + assert.Equal(t, bridge.Attrs().Index, tap.Attrs().MasterIndex, "TAP should be attached to bridge") + + // Wait for nginx to start + t.Log("Waiting for nginx to start...") + err = waitForLogMessage(ctx, manager, inst.Id, "start worker processes", 15*time.Second) + require.NoError(t, err, "Nginx should start") + t.Log("Nginx is running") + + // Wait for exec agent to be ready + t.Log("Waiting for exec agent...") + err = waitForLogMessage(ctx, manager, inst.Id, "[exec-agent] listening", 10*time.Second) + require.NoError(t, err, "Exec agent should be listening") + t.Log("Exec agent is ready") + + // Test initial internet connectivity via exec + t.Log("Testing initial internet connectivity via exec...") + output, exitCode, err := execCommand(ctx, inst.VsockSocket, "curl", "-s", "--connect-timeout", "10", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") + if err != nil || exitCode != 0 { + t.Logf("curl failed: exitCode=%d err=%v output=%s", exitCode, err, output) + } + require.NoError(t, err, "Exec should succeed") + require.Equal(t, 0, exitCode, "curl should succeed") + require.Contains(t, output, "Connection successful", "Should get successful response") + t.Log("Initial internet connectivity verified!") + + // Standby instance + t.Log("Standing by instance...") + inst, err = manager.StandbyInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStandby, inst.State) + assert.True(t, inst.HasSnapshot) + t.Log("Instance in standby") + + // Verify TAP device is cleaned up during standby + t.Log("Verifying TAP device cleaned up during standby...") + _, err = netlink.LinkByName(alloc.TAPDevice) + require.Error(t, err, "TAP device should be deleted during standby") + t.Log("TAP device cleaned up as expected") + + // Verify network allocation still returns correct IP/MAC during standby (from snapshot) + t.Log("Verifying network allocation during standby...") + allocStandby, err := manager.networkManager.GetAllocation(ctx, inst.Id) + require.NoError(t, err) + require.NotNil(t, allocStandby, "Allocation should exist during standby") + assert.Equal(t, alloc.IP, allocStandby.IP, "IP should be preserved during standby") + assert.Equal(t, alloc.MAC, allocStandby.MAC, "MAC should be preserved during standby") + t.Logf("Network allocation during standby: IP=%s, MAC=%s", allocStandby.IP, allocStandby.MAC) + + // Restore instance + t.Log("Restoring instance from standby...") + inst, err = manager.RestoreInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateRunning, inst.State) + t.Log("Instance restored and running") + + // Wait for VM to be ready again + err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second) + require.NoError(t, err) + t.Log("VM is ready after restore") + + // Verify network allocation is restored + t.Log("Verifying network allocation restored...") + allocRestored, err := manager.networkManager.GetAllocation(ctx, inst.Id) + require.NoError(t, err) + require.NotNil(t, allocRestored, "Allocation should exist after restore") + assert.Equal(t, alloc.IP, allocRestored.IP, "IP should be preserved") + assert.Equal(t, alloc.MAC, allocRestored.MAC, "MAC should be preserved") + assert.Equal(t, alloc.TAPDevice, allocRestored.TAPDevice, "TAP name should be preserved") + t.Logf("Network allocation restored: IP=%s, MAC=%s, TAP=%s", allocRestored.IP, allocRestored.MAC, allocRestored.TAPDevice) + + // Verify TAP device exists again + t.Log("Verifying TAP device recreated...") + tapRestored, err := netlink.LinkByName(allocRestored.TAPDevice) + require.NoError(t, err) + assert.Equal(t, uint8(netlink.OperUp), uint8(tapRestored.Attrs().OperState)) + t.Log("TAP device recreated successfully") + + // Test internet connectivity after restore via exec + // Retry a few times as exec agent may need a moment after restore + t.Log("Testing internet connectivity after restore via exec...") + var restoreOutput string + var restoreExitCode int + for i := 0; i < 10; i++ { + restoreOutput, restoreExitCode, err = execCommand(ctx, inst.VsockSocket, "curl", "-s", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") + if err == nil && restoreExitCode == 0 { + break + } + t.Logf("Exec attempt %d/10: err=%v exitCode=%d output=%s", i+1, err, restoreExitCode, restoreOutput) + time.Sleep(500 * time.Millisecond) + } + require.NoError(t, err, "Exec should succeed after restore") + require.Equal(t, 0, restoreExitCode, "curl should succeed after restore") + require.Contains(t, restoreOutput, "Connection successful", "Should get successful response after restore") + t.Log("Internet connectivity verified after restore!") + + // Verify the original nginx process is still running (proves restore worked, not reboot) + t.Log("Verifying nginx master process is still running...") + psOutput, psExitCode, err := execCommand(ctx, inst.VsockSocket, "ps", "aux") + require.NoError(t, err) + require.Equal(t, 0, psExitCode) + require.Contains(t, psOutput, "nginx: master process", "nginx master should still be running") + t.Log("Nginx process confirmed running - restore was successful!") + + // Cleanup + t.Log("Cleaning up instance...") + err = manager.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + + // Verify TAP deleted after instance cleanup + t.Log("Verifying TAP deleted after cleanup...") + _, err = netlink.LinkByName(alloc.TAPDevice) + require.Error(t, err, "TAP device should be deleted") + t.Log("TAP device cleaned up after delete") + + // Verify network allocation released after delete + t.Log("Verifying network allocation released after delete...") + _, err = manager.networkManager.GetAllocation(ctx, inst.Id) + require.Error(t, err, "Network allocation should not exist after delete") + t.Log("Network allocation released after delete") + + t.Log("Network integration test complete!") +} + +// execCommand runs a command in the instance via vsock and returns stdout+stderr, exit code, and error +func execCommand(ctx context.Context, vsockSocket string, command ...string) (string, int, error) { + var stdout, stderr bytes.Buffer + + exit, err := exec.ExecIntoInstance(ctx, vsockSocket, exec.ExecOptions{ + Command: command, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + if err != nil { + return stderr.String(), -1, err + } + + // Return combined output + output := stdout.String() + if stderr.Len() > 0 { + output += "\nSTDERR: " + stderr.String() + } + return output, exit.Code, nil +} + +// requireKVMAccess checks for KVM availability +func requireKVMAccess(t *testing.T) { + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group") + } +} diff --git a/lib/instances/restore.go b/lib/instances/restore.go index 434f447b..ffce3d36 100644 --- a/lib/instances/restore.go +++ b/lib/instances/restore.go @@ -45,33 +45,60 @@ func (m *manager) restoreInstance( // 3. Get snapshot directory snapshotDir := m.paths.InstanceSnapshotLatest(id) - // 4. Transition: Standby → Paused (start VMM + restore) + // 4. Recreate TAP device if network enabled + if stored.NetworkEnabled { + log.DebugContext(ctx, "recreating network for restore", "id", id, "network", "default") + if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { + log.ErrorContext(ctx, "failed to recreate network", "id", id, "error", err) + return nil, fmt.Errorf("recreate network: %w", err) + } + } + + // 5. Transition: Standby → Paused (start VMM + restore) log.DebugContext(ctx, "restoring from snapshot", "id", id, "snapshot_dir", snapshotDir) if err := m.restoreFromSnapshot(ctx, stored, snapshotDir); err != nil { log.ErrorContext(ctx, "failed to restore from snapshot", "id", id, "error", err) + // Cleanup network on failure + // Note: Network cleanup is explicitly called on failure paths to ensure TAP devices + // are removed. In production, stale TAP devices from unexpected failures (e.g., + // power loss) would require manual cleanup or host reboot. + if stored.NetworkEnabled { + netAlloc, _ := m.networkManager.GetAllocation(ctx, id) + m.networkManager.ReleaseAllocation(ctx, netAlloc) + } return nil, err } - // 5. Create client for resumed VM + // 6. Create client for resumed VM client, err := vmm.NewVMM(stored.SocketPath) if err != nil { log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) + // Cleanup network on failure + if stored.NetworkEnabled { + netAlloc, _ := m.networkManager.GetAllocation(ctx, id) + m.networkManager.ReleaseAllocation(ctx, netAlloc) + } return nil, fmt.Errorf("create vmm client: %w", err) } - // 6. Transition: Paused → Running (resume) + // 7. Transition: Paused → Running (resume) log.DebugContext(ctx, "resuming VM", "id", id) resumeResp, err := client.ResumeVMWithResponse(ctx) if err != nil || resumeResp.StatusCode() != 204 { log.ErrorContext(ctx, "failed to resume VM", "id", id, "error", err) + // Cleanup network on failure + if stored.NetworkEnabled { + netAlloc, _ := m.networkManager.GetAllocation(ctx, id) + m.networkManager.ReleaseAllocation(ctx, netAlloc) + } return nil, fmt.Errorf("resume vm failed: %w", err) } - // 7. Delete snapshot after successful restore + // 8. Delete snapshot after successful restore log.DebugContext(ctx, "deleting snapshot after successful restore", "id", id) os.RemoveAll(snapshotDir) // Best effort, ignore errors - // 8. Update timestamp + // 9. Update timestamp now := time.Now() stored.StartedAt = &now diff --git a/lib/instances/standby.go b/lib/instances/standby.go index 4ae9cf60..ad745164 100644 --- a/lib/instances/standby.go +++ b/lib/instances/standby.go @@ -7,6 +7,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/vmm" ) @@ -37,21 +38,32 @@ func (m *manager) standbyInstance( return nil, fmt.Errorf("%w: cannot standby from state %s", ErrInvalidState, inst.State) } - // 3. Create VMM client + // 3. Get network allocation BEFORE killing VMM (while we can still query it) + // This is needed to delete the TAP device after VMM shuts down + var networkAlloc *network.Allocation + if inst.NetworkEnabled { + log.DebugContext(ctx, "getting network allocation", "id", id) + networkAlloc, err = m.networkManager.GetAllocation(ctx, id) + if err != nil { + log.WarnContext(ctx, "failed to get network allocation, will still attempt cleanup", "id", id, "error", err) + } + } + + // 4. Create VMM client client, err := vmm.NewVMM(inst.SocketPath) if err != nil { log.ErrorContext(ctx, "failed to create VMM client", "id", id, "error", err) return nil, fmt.Errorf("create vmm client: %w", err) } - // 4. Reduce memory to base size (virtio-mem hotplug) + // 5. Reduce memory to base size (virtio-mem hotplug) log.DebugContext(ctx, "reducing VM memory before snapshot", "id", id, "base_size", inst.Size) if err := reduceMemory(ctx, client, inst.Size); err != nil { // Log warning but continue - snapshot will just be larger log.WarnContext(ctx, "failed to reduce memory, snapshot will be larger", "id", id, "error", err) } - // 5. Transition: Running → Paused + // 6. Transition: Running → Paused log.DebugContext(ctx, "pausing VM", "id", id) pauseResp, err := client.PauseVMWithResponse(ctx) if err != nil || pauseResp.StatusCode() != 204 { @@ -59,7 +71,7 @@ func (m *manager) standbyInstance( return nil, fmt.Errorf("pause vm failed: %w", err) } - // 6. Create snapshot + // 7. Create snapshot snapshotDir := m.paths.InstanceSnapshotLatest(id) log.DebugContext(ctx, "creating snapshot", "id", id, "snapshot_dir", snapshotDir) if err := createSnapshot(ctx, client, snapshotDir); err != nil { @@ -69,14 +81,25 @@ func (m *manager) standbyInstance( return nil, fmt.Errorf("create snapshot: %w", err) } - // 7. Stop VMM gracefully (snapshot is complete) + // 8. Stop VMM gracefully (snapshot is complete) log.DebugContext(ctx, "shutting down VMM", "id", id) if err := m.shutdownVMM(ctx, &inst); err != nil { // Log but continue - snapshot was created successfully log.WarnContext(ctx, "failed to shutdown VMM gracefully, snapshot still valid", "id", id, "error", err) } - // 8. Update timestamp and clear PID (VMM no longer running) + // 9. Release network allocation (delete TAP device) + // TAP devices with explicit Owner/Group fields do NOT auto-delete when VMM exits + // They must be explicitly deleted + if inst.NetworkEnabled { + log.DebugContext(ctx, "releasing network", "id", id, "network", "default") + if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { + // Log error but continue - snapshot was created successfully + log.WarnContext(ctx, "failed to release network, continuing with standby", "id", id, "error", err) + } + } + + // 10. Update timestamp and clear PID (VMM no longer running) now := time.Now() stored.StoppedAt = &now stored.CHPID = nil diff --git a/lib/instances/storage.go b/lib/instances/storage.go index 4e91bb50..cc5e598a 100644 --- a/lib/instances/storage.go +++ b/lib/instances/storage.go @@ -19,7 +19,7 @@ import ( // console.log # Serial console output // snapshots/ // snapshot-latest/ # Snapshot directory -// vm.json +// config.json // memory-ranges // metadata wraps StoredMetadata for JSON serialization diff --git a/lib/instances/types.go b/lib/instances/types.go index 2bf40b75..3721fb93 100644 --- a/lib/instances/types.go +++ b/lib/instances/types.go @@ -32,7 +32,10 @@ type StoredMetadata struct { Vcpus int // Configuration - Env map[string]string + Env map[string]string + NetworkEnabled bool // Whether instance has networking enabled (uses default network) + IP string // Assigned IP address (empty if NetworkEnabled=false) + MAC string // Assigned MAC address (empty if NetworkEnabled=false) // Timestamps (stored for historical tracking) CreatedAt time.Time @@ -64,13 +67,14 @@ type Instance struct { // CreateInstanceRequest is the domain request for creating an instance type CreateInstanceRequest struct { - Name string // Required - Image string // Required: OCI reference - Size int64 // Base memory in bytes (default: 1GB) - HotplugSize int64 // Hotplug memory in bytes (default: 3GB) - OverlaySize int64 // Overlay disk size in bytes (default: 10GB) - Vcpus int // Default 2 - Env map[string]string // Optional environment variables + Name string // Required + Image string // Required: OCI reference + Size int64 // Base memory in bytes (default: 1GB) + HotplugSize int64 // Hotplug memory in bytes (default: 3GB) + OverlaySize int64 // Overlay disk size in bytes (default: 10GB) + Vcpus int // Default 2 + Env map[string]string // Optional environment variables + NetworkEnabled bool // Whether to enable networking (uses default network) } // AttachVolumeRequest is the domain request for attaching a volume diff --git a/lib/network/README.md b/lib/network/README.md new file mode 100644 index 00000000..c1bdd899 --- /dev/null +++ b/lib/network/README.md @@ -0,0 +1,276 @@ +# Network Manager + +Manages the default virtual network for instances using a Linux bridge and TAP devices. + +## How Linux VM Networking Works + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ HOST │ +│ │ +│ ┌───────────┐ ┌───────────┐ │ +│ │ VM 1 │ │ VM 2 │ │ +│ │ (no net) │ │ 10.100. │ │ +│ │ │ │ 5.42 │ │ +│ └───────────┘ └─────┬─────┘ │ +│ │ │ +│ ┌────┴────┐ │ +│ │ TAP │ │ +│ │ hype-x │ │ +│ └────┬────┘ │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ LINUX KERNEL │ │ +│ │ ┌─────────────┐ ┌───────────────┐ │ │ +│ │ │ Bridge │ routing + iptables │ eth0 │ │ │ +│ │ │ (vmbr0) │ ─────────────────────────>│ (uplink) │ │ │ +│ │ │ 10.100.0.1 │ NAT/masquerade │ public IP │ │ │ +│ │ └─────────────┘ └───────┬───────┘ │ │ +│ └────────────────────────────────────────────────────┼──────────┘ │ +│ │ │ +└───────────────────────────────────────────────────────┼──────────────┘ + │ + To Internet +``` + +**Key concepts:** + +- **TAP device**: A virtual network interface. Each VM gets one (unless networking is disabled). It's like a virtual ethernet cable connecting the VM to the host. + +- **Bridge**: A virtual network switch inside the kernel. All TAP devices connect to it. The bridge has an IP (the gateway) that VMs use as their default route. + +- **Linux kernel as router**: The kernel routes packets between the bridge (VM network) and the uplink (physical network). iptables NAT rules translate VM private IPs to the host's public IP for outbound traffic. + +**What Hypeman creates:** +1. One bridge (`vmbr0`) with the gateway IP (e.g., `10.100.0.1`) +2. One TAP device per networked VM (e.g., `hype-abc123`) +3. iptables rules for NAT and forwarding + +This setup allows for VMs with an attached network to communicate to the internet and for programs on the host to connect to the VMs via their private IP addresses. + +## Overview + +Hypeman provides a single default network that all instances can optionally connect to. There is no support for multiple custom networks - instances either have networking enabled (connected to the default network) or disabled (no network connectivity). + +## Design Decisions + +### State Derivation (No Central Allocations File) + +**What:** Network allocations are derived from Cloud Hypervisor and snapshots, not stored in a central file. + +**Why:** +- Single source of truth (CH and snapshots are authoritative) +- Self-contained guest directories (delete directory = automatic cleanup) +- No state drift between allocation file and reality +- Follows instance manager's pattern + +**Sources of truth:** +- **Running VMs**: Query `GetVmInfo()` from Cloud Hypervisor - returns IP/MAC/TAP +- **Standby VMs**: Read `guests/{id}/snapshots/snapshot-latest/config.json` from snapshot +- **Stopped VMs**: No network allocation + +**Metadata storage:** +``` +/var/lib/hypeman/guests/{instance-id}/ + metadata.json # Contains: network_enabled field (bool) + snapshots/ + snapshot-latest/ + config.json # Cloud Hypervisor's config with IP/MAC/TAP +``` + +### Hybrid Network Model + +**Standby → Restore: Network Fixed** +- TAP device deleted on standby (VMM shutdown) +- Snapshot `config.json` preserves IP/MAC/TAP names +- Restore recreates TAP with same name +- DNS entries unchanged +- Fast resume path + +**Shutdown → Boot: Network Reconfigurable** +- TAP device deleted, DNS unregistered +- Can boot with different network settings (enabled/disabled) +- Allows upgrades, migrations, reconfiguration +- Full recreate path + +### Default Network + +- Auto-created on first `Initialize()` call +- Configured from environment variables (BRIDGE_NAME, SUBNET_CIDR, SUBNET_GATEWAY) +- Named "default" (only network in the system) +- Always uses bridge_slave isolated mode for VM-to-VM isolation + +### Name Uniqueness + +Instance names must be globally unique: +- Enforced at allocation time by checking all running/standby instances +- Simpler than per-network scoping + +### DNS Configuration + +Guests are configured to use external DNS servers directly (no internal DNS server needed): +- Configurable via `DNS_SERVER` environment variable (default: 1.1.1.1) +- Set in guest's `/etc/resolv.conf` during boot + +### Dependencies + +**Go libraries:** +- `github.com/vishvananda/netlink` - Bridge/TAP operations (standard, used by Docker/K8s) + +**Shell commands:** +- `iptables` - Complex rule manipulation not well-supported in netlink +- `ip link set X type bridge_slave isolated on` - Netlink library doesn't expose this flag + +### Prerequisites + +Before running Hypeman, ensure IPv4 forwarding is enabled: + +```bash +# Enable IPv4 forwarding (temporary - until reboot) +sudo sysctl -w net.ipv4.ip_forward=1 + +# Enable IPv4 forwarding (persistent across reboots) +echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf +sudo sysctl -p +``` + +**Why:** Required for routing traffic between VM network and external network. Hypeman will check this at startup and fail with an informative error if not enabled. + +### Permissions + +Network operations require `CAP_NET_ADMIN` and `CAP_NET_BIND_SERVICE` capabilities. + +**Installation requirement:** +```bash +sudo setcap 'cap_net_admin,cap_net_bind_service=+eip' /path/to/hypeman +``` + +**Capability flags explained:** +- `e` = effective (capabilities are active) +- `i` = inheritable (can be passed to child processes) +- `p` = permitted (capabilities are available) + +**Why:** +- Narrowly scoped permissions (not full root), standard practice for network services +- The `i` flag allows child processes (like `ip` and `iptables` commands) to inherit `CAP_NET_ADMIN` via ambient capabilities, avoiding the need to grant system-wide capabilities to `/usr/bin/ip` or `/usr/sbin/iptables` + +## Filesystem Layout + +``` +/var/lib/hypeman/ + network/ # Network state directory (reserved for future use) + guests/ + {instance-id}/ + metadata.json # Contains: network_enabled field (bool) + snapshots/ + snapshot-latest/ + config.json # Contains: IP/MAC/TAP (source of truth) +``` + +## Network Operations + +### Initialize +- Create default network bridge (vmbr0 or configured name) +- Assign gateway IP +- Setup iptables NAT and forwarding + +### CreateAllocation +1. Get default network details +2. Check name uniqueness globally +3. Allocate next available IP (starting from .2, after gateway at .1) +4. Generate MAC (02:00:00:... format - locally administered) +5. Generate TAP name (tap-{first8chars-of-instance-id}) +6. Create TAP device and attach to bridge + +### RecreateAllocation (for restore from standby) +1. Derive allocation from snapshot config.json +2. Recreate TAP device with same name +3. Attach to bridge with isolation mode + +### ReleaseAllocation (for shutdown/delete) +1. Derive current allocation +2. Delete TAP device + +Note: In case of unexpected scenarios like power loss, straggler TAP devices may persist until manual cleanup or host reboot. + +## IP Allocation Strategy + +- Gateway at .1 (first IP in subnet) +- Instance IPs start from .2 +- **Random allocation** with up to 5 retry attempts + - Picks random IP in usable range + - Checks for conflicts + - Retries if conflict found + - Falls back to sequential scan if all random attempts fail +- Helps distribute IPs across large subnets (especially /16) +- Reduces conflicts when moving standby VMs across hosts +- Skip network address, gateway, and broadcast address +- RNG seeded with timestamp for uniqueness across runs + +## Concurrency & Locking + +The network manager uses a single mutex to protect allocation operations: + +### Locked Operations +- **CreateAllocation**: Prevents concurrent IP allocation + +### Unlocked Operations +- **RecreateAllocation**: Safe without lock - protected by instance-level locking, doesn't allocate IPs +- **ReleaseAllocation**: Safe without lock - only deletes TAP device +- **Read operations** (GetAllocation, ListAllocations, NameExists): No lock needed - eventual consistency is acceptable + +### Why This Works +- Write operations are serialized to prevent race conditions +- Read operations can run concurrently for better performance +- Internal calls (e.g., CreateAllocation → ListAllocations) work because reads don't lock +- Instance manager already provides per-instance locking for state transitions + +## Security + +**Bridge_slave isolated mode:** +- Prevents layer-2 VM-to-VM communication +- VMs can only communicate with gateway (for internet access) +- Instance proxy could route traffic between VMs if needed in the future + +**iptables rules:** +- NAT for outbound connections +- Stateful firewall (only allow ESTABLISHED,RELATED inbound) +- Default DENY for forwarding +- Rules added on Initialize, per-subnet basis + +## Testing + +Network manager tests create real network devices (bridges, TAPs) and require elevated permissions. + +### Running Tests + +```bash +make test +``` + +The Makefile compiles test binaries and grants capabilities via `sudo setcap`, then runs tests as your user (not root). + +### Test Isolation + +Network integration tests use per-test unique configuration for safe parallel execution: + +- Each test gets a unique bridge and /29 subnet in 172.16.0.0/12 range +- Bridge names: `t{3hex}` (e.g., `t5a3`, `tff2`) +- 131,072 possible test networks (supports massive parallelism) +- Tests run safely in parallel with `t.Parallel()` +- Hash includes test name + PID + timestamp + random = cross-run safe + +**Subnet allocation:** +- /29 subnets = 6 usable IPs per test (sufficient for test cases) +- Each test creates independent bridge on unique IP + +### Cleanup + +Cleanup happens automatically via `t.Cleanup()`, which runs even on test failure or panic. + +### Unit Tests vs Integration Tests + +- **Unit tests** (TestGenerateMAC, etc.): Run without permissions, test logic only +- **Integration tests** (TestInitializeIntegration, TestCreateAllocationIntegration, etc.): Require permissions, create real devices + +All tests run via `make test` - no separate commands needed. + diff --git a/lib/network/allocate.go b/lib/network/allocate.go new file mode 100644 index 00000000..5396627d --- /dev/null +++ b/lib/network/allocate.go @@ -0,0 +1,273 @@ +package network + +import ( + "context" + "crypto/rand" + "fmt" + mathrand "math/rand" + "net" + "strings" + + "github.com/onkernel/hypeman/lib/logger" +) + +// CreateAllocation allocates IP/MAC/TAP for instance on the default network +func (m *manager) CreateAllocation(ctx context.Context, req AllocateRequest) (*NetworkConfig, error) { + // Acquire lock to prevent concurrent allocations from: + // 1. Picking the same IP address + // 2. Creating duplicate instance names + m.mu.Lock() + defer m.mu.Unlock() + + log := logger.FromContext(ctx) + + // 1. Get default network + network, err := m.getDefaultNetwork(ctx) + if err != nil { + return nil, fmt.Errorf("get default network: %w", err) + } + + // 2. Check name uniqueness + exists, err := m.NameExists(ctx, req.InstanceName) + if err != nil { + return nil, fmt.Errorf("check name exists: %w", err) + } + if exists { + return nil, fmt.Errorf("%w: instance name '%s' already exists, can't assign into same network: %s", + ErrNameExists, req.InstanceName, network.Name) + } + + // 3. Allocate random available IP + // Random selection reduces predictability and helps distribute IPs across the subnet. + // This is especially useful for large /16 networks and reduces conflicts when + // moving standby VMs across hosts. + ip, err := m.allocateNextIP(ctx, network.Subnet) + if err != nil { + return nil, fmt.Errorf("allocate IP: %w", err) + } + + // 4. Generate MAC (02:00:00:... format - locally administered) + mac, err := generateMAC() + if err != nil { + return nil, fmt.Errorf("generate MAC: %w", err) + } + + // 5. Generate TAP name (tap-{first8chars-of-id}) + tap := generateTAPName(req.InstanceID) + + // 6. Create TAP device + if err := m.createTAPDevice(tap, network.Bridge, network.Isolated); err != nil { + return nil, fmt.Errorf("create TAP device: %w", err) + } + + log.InfoContext(ctx, "allocated network", + "instance_id", req.InstanceID, + "instance_name", req.InstanceName, + "network", "default", + "ip", ip, + "mac", mac, + "tap", tap) + + // 7. Calculate netmask from subnet + _, ipNet, _ := net.ParseCIDR(network.Subnet) + netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3]) + + // 8. Return config (will be used in CH VmConfig) + return &NetworkConfig{ + IP: ip, + MAC: mac, + Gateway: network.Gateway, + Netmask: netmask, + DNS: m.config.DNSServer, + TAPDevice: tap, + }, nil +} + +// RecreateAllocation recreates TAP for restore from standby +// Note: No lock needed - this operation: +// 1. Doesn't allocate new IPs (reuses existing from snapshot) +// 2. Is already protected by instance-level locking +// 3. Uses deterministic TAP names that can't conflict +func (m *manager) RecreateAllocation(ctx context.Context, instanceID string) error { + log := logger.FromContext(ctx) + + // 1. Derive allocation from snapshot + alloc, err := m.deriveAllocation(ctx, instanceID) + if err != nil { + return fmt.Errorf("derive allocation: %w", err) + } + if alloc == nil { + // No network configured for this instance + return nil + } + + // 2. Get default network details + network, err := m.getDefaultNetwork(ctx) + if err != nil { + return fmt.Errorf("get default network: %w", err) + } + + // 3. Recreate TAP device with same name + if err := m.createTAPDevice(alloc.TAPDevice, network.Bridge, network.Isolated); err != nil { + return fmt.Errorf("create TAP device: %w", err) + } + + log.InfoContext(ctx, "recreated network for restore", + "instance_id", instanceID, + "network", "default", + "tap", alloc.TAPDevice) + + return nil +} + +// ReleaseAllocation cleans up network allocation (shutdown/delete) +// Takes the allocation directly since it should be retrieved before the VMM is killed. +// If alloc is nil, this is a no-op (network not allocated or already released). +// Note: TAP devices created with explicit Owner/Group fields do NOT auto-delete when +// the process closes the file descriptor. They persist in the kernel and must be +// explicitly deleted via this function. In case of unexpected scenarios like host +// power loss, straggler TAP devices may remain until the host is rebooted or manually cleaned up. +func (m *manager) ReleaseAllocation(ctx context.Context, alloc *Allocation) error { + log := logger.FromContext(ctx) + + // If no allocation provided, nothing to clean up + if alloc == nil { + return nil + } + + // 1. Delete TAP device (best effort) + if err := m.deleteTAPDevice(alloc.TAPDevice); err != nil { + log.WarnContext(ctx, "failed to delete TAP device", "tap", alloc.TAPDevice, "error", err) + } + + log.InfoContext(ctx, "released network", + "instance_id", alloc.InstanceID, + "network", alloc.Network, + "ip", alloc.IP) + + return nil +} + +// allocateNextIP picks a random available IP in the subnet +// Retries up to 5 times if conflicts occur +func (m *manager) allocateNextIP(ctx context.Context, subnet string) (string, error) { + // Parse subnet + _, ipNet, err := net.ParseCIDR(subnet) + if err != nil { + return "", fmt.Errorf("parse subnet: %w", err) + } + + // Get all currently allocated IPs + allocations, err := m.ListAllocations(ctx) + if err != nil { + return "", fmt.Errorf("list allocations: %w", err) + } + + // Build set of used IPs + usedIPs := make(map[string]bool) + for _, alloc := range allocations { + usedIPs[alloc.IP] = true + } + + // Reserve network address and gateway + usedIPs[ipNet.IP.String()] = true // Network address + usedIPs[incrementIP(ipNet.IP, 1).String()] = true // Gateway (network + 1) + + // Calculate broadcast address + broadcast := make(net.IP, 4) + for i := 0; i < 4; i++ { + broadcast[i] = ipNet.IP[i] | ^ipNet.Mask[i] + } + usedIPs[broadcast.String()] = true // Broadcast address + + // Calculate subnet size (number of possible IPs) + ones, bits := ipNet.Mask.Size() + subnetSize := 1 << (bits - ones) // 2^(32-prefix_length) + + // Try up to 5 times to find a random available IP + maxRetries := 5 + for attempt := 0; attempt < maxRetries; attempt++ { + // Generate random offset from network address (skip network and gateway) + // Start from offset 2 to avoid network address (0) and gateway (1) + randomOffset := mathrand.Intn(subnetSize-3) + 2 + + // Calculate the random IP + randomIP := incrementIP(ipNet.IP, randomOffset) + + // Check if IP is valid and available + if ipNet.Contains(randomIP) { + ipStr := randomIP.String() + if !usedIPs[ipStr] { + return ipStr, nil + } + } + } + + // If random allocation failed after 5 attempts, fall back to sequential search + // This handles the case where the subnet is nearly full + for testIP := incrementIP(ipNet.IP, 2); ipNet.Contains(testIP); testIP = incrementIP(testIP, 1) { + ipStr := testIP.String() + if !usedIPs[ipStr] { + return ipStr, nil + } + } + + return "", fmt.Errorf("no available IPs in subnet %s after %d random attempts and full scan", subnet, maxRetries) +} + +// incrementIP increments IP address by n +func incrementIP(ip net.IP, n int) net.IP { + // Ensure we're working with IPv4 (4 bytes) + ip4 := ip.To4() + if ip4 == nil { + // Should not happen with our subnet parsing, but handle it + return ip + } + + result := make(net.IP, 4) + copy(result, ip4) + + // Convert to 32-bit integer, increment, convert back + val := uint32(result[0])<<24 | uint32(result[1])<<16 | uint32(result[2])<<8 | uint32(result[3]) + val += uint32(n) + result[0] = byte(val >> 24) + result[1] = byte(val >> 16) + result[2] = byte(val >> 8) + result[3] = byte(val) + + return result +} + +// generateMAC generates a random MAC address with local administration bit set +func generateMAC() (string, error) { + // Generate 6 random bytes + buf := make([]byte, 6) + if _, err := rand.Read(buf); err != nil { + return "", err + } + + // Set local administration bit (bit 1 of first byte) + // Use 02:00:00:... format (locally administered, unicast) + buf[0] = 0x02 + buf[1] = 0x00 + buf[2] = 0x00 + + // Format as MAC address + return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]), nil +} + +// TAPPrefix is the prefix used for hypeman TAP devices +const TAPPrefix = "hype-" + +// generateTAPName generates TAP device name from instance ID +func generateTAPName(instanceID string) string { + // Use first 8 chars of instance ID + // hype-{8chars} fits within 15-char Linux interface name limit + shortID := instanceID + if len(shortID) > 8 { + shortID = shortID[:8] + } + return TAPPrefix + strings.ToLower(shortID) +} + diff --git a/lib/network/bridge.go b/lib/network/bridge.go new file mode 100644 index 00000000..45db556e --- /dev/null +++ b/lib/network/bridge.go @@ -0,0 +1,583 @@ +package network + +import ( + "context" + "fmt" + "net" + "os" + "os/exec" + "strings" + "syscall" + + "github.com/onkernel/hypeman/lib/logger" + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" +) + +// DeriveGateway returns the first usable IP in a subnet (used as gateway). +// e.g., 10.100.0.0/16 -> 10.100.0.1 +func DeriveGateway(cidr string) (string, error) { + _, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + return "", fmt.Errorf("parse CIDR: %w", err) + } + + // Gateway is network address + 1 + gateway := make(net.IP, len(ipNet.IP)) + copy(gateway, ipNet.IP) + gateway[len(gateway)-1]++ // Increment last octet + + return gateway.String(), nil +} + +// checkSubnetConflicts checks if the configured subnet conflicts with existing routes. +// Returns an error if a conflict is detected, with guidance on how to resolve it. +func (m *manager) checkSubnetConflicts(ctx context.Context, subnet string) error { + log := logger.FromContext(ctx) + + _, configuredNet, err := net.ParseCIDR(subnet) + if err != nil { + return fmt.Errorf("parse subnet: %w", err) + } + + routes, err := netlink.RouteList(nil, netlink.FAMILY_V4) + if err != nil { + return fmt.Errorf("list routes: %w", err) + } + + for _, route := range routes { + if route.Dst == nil { + continue // Skip default route (nil Dst) + } + + // Skip default route (0.0.0.0/0) - it matches everything but isn't a real conflict + if route.Dst.IP.IsUnspecified() { + continue + } + + // Check if our subnet overlaps with this route's destination + // Overlap occurs if either network contains the other's start address + if configuredNet.Contains(route.Dst.IP) || route.Dst.Contains(configuredNet.IP) { + // Get interface name for better error message + ifaceName := "unknown" + if link, err := netlink.LinkByIndex(route.LinkIndex); err == nil { + ifaceName = link.Attrs().Name + } + + // Skip if this is our own bridge (already configured from previous run) + if ifaceName == m.config.BridgeName { + continue + } + + log.ErrorContext(ctx, "subnet conflict detected", + "configured_subnet", subnet, + "conflicting_route", route.Dst.String(), + "interface", ifaceName) + + return fmt.Errorf("SUBNET CONFLICT: configured subnet %s overlaps with existing route %s (interface: %s)\n\n"+ + "This will cause network connectivity issues. Please update your configuration:\n"+ + " - Set SUBNET_CIDR to a non-conflicting range (e.g., 10.200.0.0/16, 172.30.0.0/16)\n"+ + " - Set SUBNET_GATEWAY to match (e.g., 10.200.0.1, 172.30.0.1)\n\n"+ + "To see existing routes: ip route show", + subnet, route.Dst.String(), ifaceName) + } + } + + log.DebugContext(ctx, "no subnet conflicts detected", "subnet", subnet) + return nil +} + +// createBridge creates or verifies a bridge interface using netlink +func (m *manager) createBridge(ctx context.Context, name, gateway, subnet string) error { + log := logger.FromContext(ctx) + + // 1. Parse subnet to get network and prefix length + _, ipNet, err := net.ParseCIDR(subnet) + if err != nil { + return fmt.Errorf("parse subnet: %w", err) + } + + // 2. Check if bridge already exists + existing, err := netlink.LinkByName(name) + if err == nil { + // Bridge exists - verify it has the expected gateway IP + addrs, err := netlink.AddrList(existing, netlink.FAMILY_V4) + if err != nil { + return fmt.Errorf("list bridge addresses: %w", err) + } + + expectedGW := net.ParseIP(gateway) + hasExpectedIP := false + var actualIPs []string + for _, addr := range addrs { + actualIPs = append(actualIPs, addr.IPNet.String()) + if addr.IP.Equal(expectedGW) { + hasExpectedIP = true + } + } + + if !hasExpectedIP { + ones, _ := ipNet.Mask.Size() + return fmt.Errorf("bridge %s exists with IPs %v but expected gateway %s/%d. "+ + "Options: (1) update SUBNET_CIDR and SUBNET_GATEWAY to match the existing bridge, "+ + "(2) use a different BRIDGE_NAME, "+ + "or (3) delete the bridge with: sudo ip link delete %s", + name, actualIPs, gateway, ones, name) + } + + // Bridge exists with correct IP, verify it's up + if err := netlink.LinkSetUp(existing); err != nil { + return fmt.Errorf("set bridge up: %w", err) + } + log.InfoContext(ctx, "bridge ready", "bridge", name, "gateway", gateway, "status", "existing") + + // Still need to ensure iptables rules are configured + if err := m.setupIPTablesRules(ctx, subnet, name); err != nil { + return fmt.Errorf("setup iptables: %w", err) + } + return nil + } + + // 3. Create bridge + bridge := &netlink.Bridge{ + LinkAttrs: netlink.LinkAttrs{ + Name: name, + }, + } + + if err := netlink.LinkAdd(bridge); err != nil { + return fmt.Errorf("create bridge: %w", err) + } + + // 4. Set bridge up + if err := netlink.LinkSetUp(bridge); err != nil { + return fmt.Errorf("set bridge up: %w", err) + } + + // 5. Add gateway IP to bridge + gatewayIP := net.ParseIP(gateway) + if gatewayIP == nil { + return fmt.Errorf("invalid gateway IP: %s", gateway) + } + + addr := &netlink.Addr{ + IPNet: &net.IPNet{ + IP: gatewayIP, + Mask: ipNet.Mask, + }, + } + + if err := netlink.AddrAdd(bridge, addr); err != nil { + return fmt.Errorf("add gateway IP to bridge: %w", err) + } + + log.InfoContext(ctx, "bridge ready", "bridge", name, "gateway", gateway, "status", "created") + + // 6. Setup iptables rules + if err := m.setupIPTablesRules(ctx, subnet, name); err != nil { + return fmt.Errorf("setup iptables: %w", err) + } + + return nil +} + +// Rule comments for identifying hypeman iptables rules +const ( + commentNAT = "hypeman-nat" + commentFwdOut = "hypeman-fwd-out" + commentFwdIn = "hypeman-fwd-in" +) + +// getUplinkInterface returns the uplink interface for NAT/forwarding. +// Uses explicit config if set, otherwise auto-detects from default route. +func (m *manager) getUplinkInterface() (string, error) { + // Explicit config takes precedence + if m.config.UplinkInterface != "" { + return m.config.UplinkInterface, nil + } + + // Auto-detect from default route + routes, err := netlink.RouteList(nil, netlink.FAMILY_V4) + if err != nil { + return "", fmt.Errorf("list routes: %w", err) + } + + for _, route := range routes { + // Default route has Dst 0.0.0.0/0 (IP.IsUnspecified() == true) + if route.Dst != nil && route.Dst.IP.IsUnspecified() { + link, err := netlink.LinkByIndex(route.LinkIndex) + if err != nil { + return "", fmt.Errorf("get link by index %d: %w", route.LinkIndex, err) + } + return link.Attrs().Name, nil + } + } + + return "", fmt.Errorf("no default route found - cannot determine uplink interface") +} + +// setupIPTablesRules sets up NAT and forwarding rules +func (m *manager) setupIPTablesRules(ctx context.Context, subnet, bridgeName string) error { + log := logger.FromContext(ctx) + + // Check if IP forwarding is enabled (prerequisite) + forwardData, err := os.ReadFile("/proc/sys/net/ipv4/ip_forward") + if err != nil { + return fmt.Errorf("check ip forwarding: %w", err) + } + if strings.TrimSpace(string(forwardData)) != "1" { + return fmt.Errorf("IPv4 forwarding is not enabled. Please enable it by running: sudo sysctl -w net.ipv4.ip_forward=1 (or add 'net.ipv4.ip_forward=1' to /etc/sysctl.conf for persistence)") + } + log.InfoContext(ctx, "ip forwarding enabled") + + // Get uplink interface (explicit config or auto-detect from default route) + uplink, err := m.getUplinkInterface() + if err != nil { + return fmt.Errorf("get uplink interface: %w", err) + } + log.InfoContext(ctx, "uplink interface", "interface", uplink) + + // Add MASQUERADE rule if not exists (position doesn't matter in POSTROUTING) + masqStatus, err := m.ensureNATRule(subnet, uplink) + if err != nil { + return err + } + log.InfoContext(ctx, "iptables NAT ready", "subnet", subnet, "uplink", uplink, "status", masqStatus) + + // FORWARD rules must be at top of chain (before Docker's DOCKER-USER/DOCKER-FORWARD) + // We insert at position 1 and 2 to ensure they're evaluated first + fwdOutStatus, err := m.ensureForwardRule(bridgeName, uplink, "NEW,ESTABLISHED,RELATED", commentFwdOut, 1) + if err != nil { + return fmt.Errorf("setup forward outbound: %w", err) + } + + fwdInStatus, err := m.ensureForwardRule(uplink, bridgeName, "ESTABLISHED,RELATED", commentFwdIn, 2) + if err != nil { + return fmt.Errorf("setup forward inbound: %w", err) + } + + log.InfoContext(ctx, "iptables FORWARD ready", "outbound", fwdOutStatus, "inbound", fwdInStatus) + + return nil +} + +// ensureNATRule ensures the MASQUERADE rule exists with correct uplink +func (m *manager) ensureNATRule(subnet, uplink string) (string, error) { + // Check if rule exists with correct subnet and uplink + checkCmd := exec.Command("iptables", "-t", "nat", "-C", "POSTROUTING", + "-s", subnet, "-o", uplink, + "-m", "comment", "--comment", commentNAT, + "-j", "MASQUERADE") + checkCmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if checkCmd.Run() == nil { + return "existing", nil + } + + // Delete any existing rule with our comment (handles uplink changes) + m.deleteNATRuleByComment(commentNAT) + + // Add rule with comment + addCmd := exec.Command("iptables", "-t", "nat", "-A", "POSTROUTING", + "-s", subnet, "-o", uplink, + "-m", "comment", "--comment", commentNAT, + "-j", "MASQUERADE") + addCmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if err := addCmd.Run(); err != nil { + return "", fmt.Errorf("add masquerade rule: %w", err) + } + return "added", nil +} + +// deleteNATRuleByComment deletes any NAT POSTROUTING rule containing our comment +func (m *manager) deleteNATRuleByComment(comment string) { + // List NAT POSTROUTING rules + cmd := exec.Command("iptables", "-t", "nat", "-L", "POSTROUTING", "--line-numbers", "-n") + cmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + output, err := cmd.Output() + if err != nil { + return + } + + // Find rule numbers with our comment (process in reverse to avoid renumbering issues) + var ruleNums []string + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if strings.Contains(line, comment) { + fields := strings.Fields(line) + if len(fields) > 0 { + ruleNums = append(ruleNums, fields[0]) + } + } + } + + // Delete in reverse order + for i := len(ruleNums) - 1; i >= 0; i-- { + delCmd := exec.Command("iptables", "-t", "nat", "-D", "POSTROUTING", ruleNums[i]) + delCmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + delCmd.Run() // ignore error + } +} + +// ensureForwardRule ensures a FORWARD rule exists at the correct position with correct interfaces +func (m *manager) ensureForwardRule(inIface, outIface, ctstate, comment string, position int) (string, error) { + // Check if rule exists at correct position with correct interfaces + if m.isForwardRuleCorrect(inIface, outIface, comment, position) { + return "existing", nil + } + + // Delete any existing rule with our comment (handles interface/position changes) + m.deleteForwardRuleByComment(comment) + + // Insert at specified position with comment + addCmd := exec.Command("iptables", "-I", "FORWARD", fmt.Sprintf("%d", position), + "-i", inIface, "-o", outIface, + "-m", "conntrack", "--ctstate", ctstate, + "-m", "comment", "--comment", comment, + "-j", "ACCEPT") + addCmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if err := addCmd.Run(); err != nil { + return "", fmt.Errorf("insert forward rule: %w", err) + } + return "added", nil +} + +// isForwardRuleCorrect checks if our rule exists at the expected position with correct interfaces +func (m *manager) isForwardRuleCorrect(inIface, outIface, comment string, position int) bool { + // List FORWARD chain with line numbers + cmd := exec.Command("iptables", "-L", "FORWARD", "--line-numbers", "-n", "-v") + cmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + output, err := cmd.Output() + if err != nil { + return false + } + + // Look for our comment at the expected position with correct interfaces + // Line format: "1 0 0 ACCEPT 0 -- vmbr0 eth0 0.0.0.0/0 0.0.0.0/0 ... /* hypeman-fwd-out */" + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if !strings.Contains(line, comment) { + continue + } + fields := strings.Fields(line) + // Check position (field 0), in interface (field 6), out interface (field 7) + if len(fields) >= 8 && + fields[0] == fmt.Sprintf("%d", position) && + fields[6] == inIface && + fields[7] == outIface { + return true + } + } + return false +} + +// deleteForwardRuleByComment deletes any FORWARD rule containing our comment +func (m *manager) deleteForwardRuleByComment(comment string) { + // List FORWARD rules + cmd := exec.Command("iptables", "-L", "FORWARD", "--line-numbers", "-n") + cmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + output, err := cmd.Output() + if err != nil { + return + } + + // Find rule numbers with our comment (process in reverse to avoid renumbering issues) + var ruleNums []string + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if strings.Contains(line, comment) { + fields := strings.Fields(line) + if len(fields) > 0 { + ruleNums = append(ruleNums, fields[0]) + } + } + } + + // Delete in reverse order + for i := len(ruleNums) - 1; i >= 0; i-- { + delCmd := exec.Command("iptables", "-D", "FORWARD", ruleNums[i]) + delCmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + delCmd.Run() // ignore error + } +} + +// createTAPDevice creates TAP device and attaches to bridge +func (m *manager) createTAPDevice(tapName, bridgeName string, isolated bool) error { + // 1. Check if TAP already exists + if _, err := netlink.LinkByName(tapName); err == nil { + // TAP already exists, delete it first + if err := m.deleteTAPDevice(tapName); err != nil { + return fmt.Errorf("delete existing TAP: %w", err) + } + } + + // 2. Create TAP device with current user as owner + // This allows Cloud Hypervisor (running as current user) to access the TAP + uid := os.Getuid() + gid := os.Getgid() + + tap := &netlink.Tuntap{ + LinkAttrs: netlink.LinkAttrs{ + Name: tapName, + }, + Mode: netlink.TUNTAP_MODE_TAP, + Owner: uint32(uid), + Group: uint32(gid), + } + + if err := netlink.LinkAdd(tap); err != nil { + return fmt.Errorf("create TAP device: %w", err) + } + + // 3. Set TAP up + tapLink, err := netlink.LinkByName(tapName) + if err != nil { + return fmt.Errorf("get TAP link: %w", err) + } + + if err := netlink.LinkSetUp(tapLink); err != nil { + return fmt.Errorf("set TAP up: %w", err) + } + + // 4. Attach TAP to bridge + bridge, err := netlink.LinkByName(bridgeName) + if err != nil { + return fmt.Errorf("get bridge: %w", err) + } + + if err := netlink.LinkSetMaster(tapLink, bridge); err != nil { + return fmt.Errorf("attach TAP to bridge: %w", err) + } + + // 5. Enable port isolation so isolated TAPs can't directly talk to each other (requires kernel support and capabilities) + if isolated { + // Use shell command for bridge_slave isolated flag + // netlink library doesn't expose this flag yet + cmd := exec.Command("ip", "link", "set", tapName, "type", "bridge_slave", "isolated", "on") + // Enable ambient capabilities so child process inherits CAP_NET_ADMIN + cmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("set isolation mode: %w (output: %s)", err, string(output)) + } + } + + return nil +} + +// deleteTAPDevice removes TAP device +func (m *manager) deleteTAPDevice(tapName string) error { + link, err := netlink.LinkByName(tapName) + if err != nil { + // TAP doesn't exist, nothing to do + return nil + } + + if err := netlink.LinkDel(link); err != nil { + return fmt.Errorf("delete TAP device: %w", err) + } + + return nil +} + +// queryNetworkState queries kernel for bridge state +func (m *manager) queryNetworkState(bridgeName string) (*Network, error) { + link, err := netlink.LinkByName(bridgeName) + if err != nil { + return nil, ErrNotFound + } + + // Verify it's actually a bridge + if link.Type() != "bridge" { + return nil, fmt.Errorf("link %s is not a bridge", bridgeName) + } + + // Get IP addresses + addrs, err := netlink.AddrList(link, netlink.FAMILY_V4) + if err != nil { + return nil, fmt.Errorf("list addresses: %w", err) + } + + if len(addrs) == 0 { + return nil, fmt.Errorf("bridge has no IP addresses") + } + + // Use first IP as gateway + gateway := addrs[0].IP.String() + subnet := addrs[0].IPNet.String() + + // Bridge exists and has IP - that's sufficient + // OperState can be OperUp, OperUnknown, etc. - all are functional for our purposes + + return &Network{ + Bridge: bridgeName, + Gateway: gateway, + Subnet: subnet, + }, nil +} + +// CleanupOrphanedTAPs removes TAP devices that aren't used by any running instance. +// runningInstanceIDs is a list of instance IDs that currently have a running VMM. +// Returns the number of TAPs deleted. +func (m *manager) CleanupOrphanedTAPs(ctx context.Context, runningInstanceIDs []string) int { + log := logger.FromContext(ctx) + + // Build set of expected TAP names for running instances + expectedTAPs := make(map[string]bool) + for _, id := range runningInstanceIDs { + tapName := generateTAPName(id) + expectedTAPs[tapName] = true + } + + // List all network interfaces + links, err := netlink.LinkList() + if err != nil { + log.WarnContext(ctx, "failed to list network links for TAP cleanup", "error", err) + return 0 + } + + deleted := 0 + for _, link := range links { + name := link.Attrs().Name + + // Only consider TAP devices with our naming prefix + if !strings.HasPrefix(name, TAPPrefix) { + continue + } + + // Check if this TAP is expected (belongs to a running instance) + if expectedTAPs[name] { + continue + } + + // Orphaned TAP - delete it + if err := m.deleteTAPDevice(name); err != nil { + log.WarnContext(ctx, "failed to delete orphaned TAP", "tap", name, "error", err) + continue + } + log.InfoContext(ctx, "deleted orphaned TAP device", "tap", name) + deleted++ + } + + return deleted +} + + + diff --git a/lib/network/derive.go b/lib/network/derive.go new file mode 100644 index 00000000..7d40900b --- /dev/null +++ b/lib/network/derive.go @@ -0,0 +1,186 @@ +package network + +import ( + "context" + "encoding/json" + "fmt" + "net" + "os" + + "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/vmm" +) + +// instanceMetadata is the minimal metadata we need to derive allocations +// Field names match StoredMetadata in lib/instances/types.go +type instanceMetadata struct { + Name string + NetworkEnabled bool +} + +// deriveAllocation derives network allocation from CH or snapshot +func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*Allocation, error) { + log := logger.FromContext(ctx) + + // 1. Load instance metadata to get instance name and network status + meta, err := m.loadInstanceMetadata(instanceID) + if err != nil { + log.DebugContext(ctx, "failed to load instance metadata", "instance_id", instanceID, "error", err) + return nil, err + } + + // 2. If network not enabled, return nil + if !meta.NetworkEnabled { + return nil, nil + } + + // 3. Get default network configuration for Gateway and Netmask + defaultNet, err := m.getDefaultNetwork(ctx) + if err != nil { + return nil, fmt.Errorf("get default network: %w", err) + } + + // Calculate netmask from subnet + _, ipNet, err := net.ParseCIDR(defaultNet.Subnet) + if err != nil { + return nil, fmt.Errorf("parse subnet CIDR: %w", err) + } + netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3]) + + // 4. Try to derive from running VM first + socketPath := m.paths.InstanceSocket(instanceID) + if fileExists(socketPath) { + client, err := vmm.NewVMM(socketPath) + if err == nil { + resp, err := client.GetVmInfoWithResponse(ctx) + if err == nil && resp.JSON200 != nil && resp.JSON200.Config.Net != nil && len(*resp.JSON200.Config.Net) > 0 { + nets := *resp.JSON200.Config.Net + net := nets[0] + if net.Ip != nil && net.Mac != nil && net.Tap != nil { + log.DebugContext(ctx, "derived allocation from running VM", "instance_id", instanceID) + return &Allocation{ + InstanceID: instanceID, + InstanceName: meta.Name, + Network: "default", + IP: *net.Ip, + MAC: *net.Mac, + TAPDevice: *net.Tap, + Gateway: defaultNet.Gateway, + Netmask: netmask, + State: "running", + }, nil + } + } + } + } + + // 5. Try to derive from snapshot + // Cloud Hypervisor creates config.json in the snapshot directory + snapshotConfigJson := m.paths.InstanceSnapshotConfig(instanceID) + if fileExists(snapshotConfigJson) { + vmConfig, err := m.parseVmJson(snapshotConfigJson) + if err == nil && vmConfig.Net != nil && len(*vmConfig.Net) > 0 { + nets := *vmConfig.Net + if nets[0].Ip != nil && nets[0].Mac != nil && nets[0].Tap != nil { + log.DebugContext(ctx, "derived allocation from snapshot", "instance_id", instanceID) + return &Allocation{ + InstanceID: instanceID, + InstanceName: meta.Name, + Network: "default", + IP: *nets[0].Ip, + MAC: *nets[0].Mac, + TAPDevice: *nets[0].Tap, + Gateway: defaultNet.Gateway, + Netmask: netmask, + State: "standby", + }, nil + } + } + } + + // 6. No allocation (stopped or network not yet configured) + return nil, nil +} + +// GetAllocation gets the allocation for a specific instance +func (m *manager) GetAllocation(ctx context.Context, instanceID string) (*Allocation, error) { + return m.deriveAllocation(ctx, instanceID) +} + +// ListAllocations scans all guest directories and derives allocations +func (m *manager) ListAllocations(ctx context.Context) ([]Allocation, error) { + guests, err := os.ReadDir(m.paths.GuestsDir()) + if err != nil { + if os.IsNotExist(err) { + return []Allocation{}, nil + } + return nil, fmt.Errorf("read guests dir: %w", err) + } + + var allocations []Allocation + for _, guest := range guests { + if !guest.IsDir() { + continue + } + alloc, err := m.deriveAllocation(ctx, guest.Name()) + if err == nil && alloc != nil { + allocations = append(allocations, *alloc) + } + } + return allocations, nil +} + +// NameExists checks if instance name is already used in the default network +func (m *manager) NameExists(ctx context.Context, name string) (bool, error) { + allocations, err := m.ListAllocations(ctx) + if err != nil { + return false, err + } + + for _, alloc := range allocations { + if alloc.InstanceName == name { + return true, nil + } + } + return false, nil +} + +// loadInstanceMetadata loads minimal instance metadata +func (m *manager) loadInstanceMetadata(instanceID string) (*instanceMetadata, error) { + metaPath := m.paths.InstanceMetadata(instanceID) + + data, err := os.ReadFile(metaPath) + if err != nil { + return nil, fmt.Errorf("read metadata: %w", err) + } + + var meta instanceMetadata + if err := json.Unmarshal(data, &meta); err != nil { + return nil, fmt.Errorf("unmarshal metadata: %w", err) + } + + return &meta, nil +} + +// parseVmJson parses Cloud Hypervisor's config.json from snapshot +// Note: Despite the function name, this parses config.json (what CH actually creates) +func (m *manager) parseVmJson(path string) (*vmm.VmConfig, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read config.json: %w", err) + } + + var vmConfig vmm.VmConfig + if err := json.Unmarshal(data, &vmConfig); err != nil { + return nil, fmt.Errorf("unmarshal config.json: %w", err) + } + + return &vmConfig, nil +} + +// fileExists checks if a file exists +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} + diff --git a/lib/network/errors.go b/lib/network/errors.go new file mode 100644 index 00000000..94c0fefc --- /dev/null +++ b/lib/network/errors.go @@ -0,0 +1,12 @@ +package network + +import "errors" + +var ( + // ErrNotFound is returned when the default network is not found + ErrNotFound = errors.New("network not found") + + // ErrNameExists is returned when an instance name already exists + ErrNameExists = errors.New("instance name already exists") +) + diff --git a/lib/network/manager.go b/lib/network/manager.go new file mode 100644 index 00000000..cb43af07 --- /dev/null +++ b/lib/network/manager.go @@ -0,0 +1,103 @@ +package network + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/paths" +) + +// Manager defines the interface for network management +type Manager interface { + // Lifecycle + Initialize(ctx context.Context, runningInstanceIDs []string) error + + // Instance allocation operations (called by instance manager) + CreateAllocation(ctx context.Context, req AllocateRequest) (*NetworkConfig, error) + RecreateAllocation(ctx context.Context, instanceID string) error + ReleaseAllocation(ctx context.Context, alloc *Allocation) error + + // Queries (derive from CH/snapshots) + GetAllocation(ctx context.Context, instanceID string) (*Allocation, error) + ListAllocations(ctx context.Context) ([]Allocation, error) + NameExists(ctx context.Context, name string) (bool, error) +} + +// manager implements the Manager interface +type manager struct { + paths *paths.Paths + config *config.Config + mu sync.Mutex // Protects network allocation operations (IP allocation) +} + +// NewManager creates a new network manager +func NewManager(p *paths.Paths, cfg *config.Config) Manager { + return &manager{ + paths: p, + config: cfg, + } +} + +// Initialize initializes the network manager and creates default network. +// runningInstanceIDs should contain IDs of instances currently running (have active VMM). +func (m *manager) Initialize(ctx context.Context, runningInstanceIDs []string) error { + log := logger.FromContext(ctx) + + // Derive gateway from subnet if not explicitly configured + gateway := m.config.SubnetGateway + if gateway == "" { + var err error + gateway, err = DeriveGateway(m.config.SubnetCIDR) + if err != nil { + return fmt.Errorf("derive gateway from subnet: %w", err) + } + } + + log.InfoContext(ctx, "initializing network manager", + "bridge", m.config.BridgeName, + "subnet", m.config.SubnetCIDR, + "gateway", gateway) + + // Check for subnet conflicts with existing host routes before creating bridge + if err := m.checkSubnetConflicts(ctx, m.config.SubnetCIDR); err != nil { + return err + } + + // Ensure default network bridge exists and iptables rules are configured + // createBridge is idempotent - handles both new and existing bridges + if err := m.createBridge(ctx, m.config.BridgeName, gateway, m.config.SubnetCIDR); err != nil { + return fmt.Errorf("setup default network: %w", err) + } + + // Cleanup orphaned TAP devices from previous runs (crashes, power loss, etc.) + if deleted := m.CleanupOrphanedTAPs(ctx, runningInstanceIDs); deleted > 0 { + log.InfoContext(ctx, "cleaned up orphaned TAP devices", "count", deleted) + } + + log.InfoContext(ctx, "network manager initialized") + return nil +} + +// getDefaultNetwork gets the default network details from kernel state +func (m *manager) getDefaultNetwork(ctx context.Context) (*Network, error) { + // Query from kernel + state, err := m.queryNetworkState(m.config.BridgeName) + if err != nil { + return nil, ErrNotFound + } + + return &Network{ + Name: "default", + Subnet: state.Subnet, + Gateway: state.Gateway, + Bridge: m.config.BridgeName, + Isolated: true, + Default: true, + CreatedAt: time.Time{}, // Unknown for default + }, nil +} + diff --git a/lib/network/manager_test.go b/lib/network/manager_test.go new file mode 100644 index 00000000..1fb77016 --- /dev/null +++ b/lib/network/manager_test.go @@ -0,0 +1,156 @@ +package network + +import ( + "net" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGenerateMAC(t *testing.T) { + // Generate 100 MACs to test uniqueness and format + seen := make(map[string]bool) + + for i := 0; i < 100; i++ { + mac, err := generateMAC() + require.NoError(t, err) + + // Check format (XX:XX:XX:XX:XX:XX) + require.Len(t, mac, 17, "MAC should be 17 chars") + + // Check starts with 02:00:00 (locally administered) + require.True(t, mac[:8] == "02:00:00", "MAC should start with 02:00:00") + + // Check uniqueness + require.False(t, seen[mac], "MAC should be unique") + seen[mac] = true + } +} + +func TestGenerateTAPName(t *testing.T) { + tests := []struct { + name string + instanceID string + want string + }{ + { + name: "8 char ID", + instanceID: "abcd1234", + want: "hype-abcd1234", + }, + { + name: "longer ID truncates", + instanceID: "abcd1234efgh5678", + want: "hype-abcd1234", + }, + { + name: "uppercase converted to lowercase", + instanceID: "ABCD1234", + want: "hype-abcd1234", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := generateTAPName(tt.instanceID) + assert.Equal(t, tt.want, got) + // Verify within Linux interface name limit (15 chars) + assert.LessOrEqual(t, len(got), 15) + }) + } +} + +func TestIncrementIP(t *testing.T) { + tests := []struct { + name string + ip string + n int + want string + }{ + { + name: "increment by 1", + ip: "192.168.1.10", + n: 1, + want: "192.168.1.11", + }, + { + name: "increment by 10", + ip: "192.168.1.10", + n: 10, + want: "192.168.1.20", + }, + { + name: "overflow to next subnet", + ip: "192.168.1.255", + n: 1, + want: "192.168.2.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ip := parseIP(tt.ip) + got := incrementIP(ip, tt.n) + assert.Equal(t, tt.want, got.String()) + }) + } +} + +func TestDeriveGateway(t *testing.T) { + tests := []struct { + name string + cidr string + want string + wantErr bool + }{ + { + name: "/16 subnet", + cidr: "10.100.0.0/16", + want: "10.100.0.1", + }, + { + name: "/24 subnet", + cidr: "192.168.1.0/24", + want: "192.168.1.1", + }, + { + name: "/8 subnet", + cidr: "10.0.0.0/8", + want: "10.0.0.1", + }, + { + name: "different starting point", + cidr: "172.30.0.0/16", + want: "172.30.0.1", + }, + { + name: "invalid CIDR", + cidr: "not-a-cidr", + wantErr: true, + }, + { + name: "missing prefix", + cidr: "10.100.0.0", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := DeriveGateway(tt.cidr) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +// Helper to parse IP +func parseIP(s string) net.IP { + return net.ParseIP(s).To4() +} + diff --git a/lib/network/types.go b/lib/network/types.go new file mode 100644 index 00000000..e63d31b4 --- /dev/null +++ b/lib/network/types.go @@ -0,0 +1,45 @@ +package network + +import "time" + +// Network represents a virtual network for instances +type Network struct { + Name string // "default", "internal" + Subnet string // "192.168.0.0/16" + Gateway string // "192.168.0.1" + Bridge string // "vmbr0" (derived from kernel) + Isolated bool // Bridge_slave isolation mode + Default bool // True for default network + CreatedAt time.Time +} + +// Allocation represents a network allocation for an instance +type Allocation struct { + InstanceID string + InstanceName string + Network string + IP string + MAC string + TAPDevice string + Gateway string // Gateway IP for this network + Netmask string // Netmask in dotted decimal notation + State string // "running", "standby" (derived from CH or snapshot) +} + +// NetworkConfig is the configuration returned after allocation +type NetworkConfig struct { + IP string + MAC string + Gateway string + Netmask string + DNS string + TAPDevice string +} + +// AllocateRequest is the request to allocate network for an instance +// Always allocates from the default network +type AllocateRequest struct { + InstanceID string + InstanceName string +} + diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index f7d27bb7..2c78b6bb 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -80,6 +80,12 @@ type CreateInstanceRequest struct { // Name Human-readable name (lowercase letters, digits, and dashes only; cannot start or end with a dash) Name string `json:"name"` + // Network Network configuration for the instance + Network *struct { + // Enabled Whether to attach instance to the default network + Enabled *bool `json:"enabled,omitempty"` + } `json:"network,omitempty"` + // OverlaySize Writable overlay disk size (human-readable format like "10GB", "50G") OverlaySize *string `json:"overlay_size,omitempty"` @@ -194,6 +200,21 @@ type Instance struct { // Name Human-readable name Name string `json:"name"` + // Network Network configuration of the instance + Network *struct { + // Enabled Whether instance is attached to the default network + Enabled *bool `json:"enabled,omitempty"` + + // Ip Assigned IP address (null if no network) + Ip *string `json:"ip"` + + // Mac Assigned MAC address (null if no network) + Mac *string `json:"mac"` + + // Name Network name (always "default" when enabled) + Name *string `json:"name,omitempty"` + } `json:"network,omitempty"` + // OverlaySize Writable overlay disk size (human-readable) OverlaySize *string `json:"overlay_size,omitempty"` @@ -5251,60 +5272,63 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xce28TOxb/Kpb3XimsJs2jLUtz/1iVUqAShYpC0S6wlTNzkvjWYw+2J21A/e4rP+aV", - "mTx6aXPJvUhIJDO2z+t3Hj52+g2HIk4EB64VHnzDKpxATOzHQ61JOLkQLI3hLXxJQWnzOJEiAakp2EGx", - "SLm+TIiemG8RqFDSRFPB8QCfET1B1xOQgKZ2FaQmImURGgKy8yDCAYYbEicM8AB3Yq47EdEEB1jPEvNI", - "aUn5GN8GWAKJBGczR2ZEUqbxYESYgmCO7KlZGhGFzJS2nZOvNxSCAeH41q74JaUSIjz4WBbjcz5YDH+H", - "UBviRxKIhpOYjBdrgpMY6jp4c3SCqJmHJIxAAg8BtWBnvBOgSIRXIHeo6DA6lETOOnxM+c2AEQ1KP6qo", - "ZvnYur7mxLO8LRGMK014uFg24FPzH4kiauQi7Kzyumasqg6O+ZRKwWPgGk2JpGTIQJXF+4Zfv3l2fHn8", - "+gIPDOUoDe3UAJ+9efsOD/But9s169b4nwidsHR8qehXqCAD7754iucZOcz5RzHEQs7QSEjk10CtSRoT", - "3jaoMRyadzHRiNErQJ/Mep9wgD7h3otPuGqcviVVU4I1+1qIWGFqwhLKYaGtgwXQe1kVxwxCLSauQYZE", - "AWKgNUgVoIiOqVYBIjxCEVETUMg4zW8oJJwLjZQmUiMhEfAIXVM9QcSOqyohnrWvhbxigkTtHg5wTG5e", - "AR+buPB4N8AJMdQMW//7SNpfu+2Dzy3/of35n9mjR//+pUk+MQXJyKzByr1ug5k/SKqtyH4eiqi6Qmby", - "Chub1ZyR97t1K3ebzdzAVANPT43KPejW4SRnpNc/9R/76wJvGiapqrDUn2fndRoPQSIxQlMqdUoYOjp7", - "X/HJfr4w5RrGIJuDSgbzxcFlRQKhUYOLJN5Lw1RpESMaAdd0REGiFkm1aI+BgyQaIkRHyGA0kWJKI4iq", - "+pkK1jb5xAJyTa9x7CIvXAXfdimXyRYB4XI8rC95buxNORrTMRnOdDX29brrKjpbv0nVx1IKWVduKKIG", - "EQ+ThNGQmG9tlUBIRzREYFZAZgJqxSScUA45OKtaHZLoUnpzBk2xXxPKVAPZIvo6Yn4kapkoHKdM04SB", - "e6cMSaohtuv8ImGEB/gfnaJW6fhCpWMlf2ZXKmUIIiWZ2SDMOchLyNRzh5ViUKoxgM/F1UyWfIhNKhEM", - "0/HYqKSsulOqFOVjlFkXjSiwaODywcosbq1ZMLYQB16GNdHwymSENoMpsDIInEcZZmMhAeU4cUarSEX5", - "lDAaXVKepI2QWKjK56nUE8iQgMhQpBrpiVdqhYgtvqyvj0TKo0Zl1dTxEghzlWlVE0oTnfrKJo2NbsWV", - "0WdBTlytNIdfpMkMJ1nqnzNA3BDsjk6foZEUMQoF14RykCgGTXwdnHP0EduKDwe4bTAVEYgFR2I0+s1w", - "kLtKPcqljBmc4oGWKdQdJLRBOrokuoE1884gWtMYlCZxglpvnx/t7u4ezOfG/n6722v39t/1uoOu+fdf", - "HGCX00xNQzS0zSKNAYOOfWaoUn8LSrApRCgmnI5AaeRHlimrCenvPx6QYdjr70Yw2tt/vLOz00QGuJaz", - "RFDeQOo4f7eeKTquSGsXa+6oyffZ4QFK7HVk+YbPDt+9NJuvVMkOEyFhHTWkfFD6nn8tXtgP7uuQ8sbS", - "PI+5c5zaEOMjgknfzo0QVWhEKJvbEiYpY/75wEjCIcwBKWywWaDXVWn+tYEmo18hQo1bNE3GpuR1iPu+", - "vViAv6SQwmUiFHXUaxtl/8YUCcOUsgjZGahlhMtKHPuoWuD0F4qflxK+bHBlR43ws7wwNpTNGE8z5Zoy", - "u4GeVSju7z5+8q/uQa9fcm7K9eM9vBYredidK46tzP5tkMfkBHjkMqiBgfsUCj41XmG/WP5MnHHAqQTw", - "7F3NGGaXQvn4MqIN6PzgXqKISgi13SGu9iHcIUmyGorNVV0e03LxSxG5Mbf4zXpDern/UL57t1D+MH2C", - "+q6fqEvFSaImokHUDxOwRQVB2RgEN1RpZQszPaEK0UyHJcl9L2m+UdTUY6hUg757sGRvt163oKE0OKzu", - "dVJOv6RQ2Q0dvT951vd7xyoZ/XWPHDy5uSH64DG9Vgdf46Ec/75LtqRTsbS3sEaD4I/1A6oq7K3c8d9l", - "h78WCmyvZYELn9s+zN39d3+h/67MnSYmwap9UxaQzu1gO0skyUIhRHInGforYtBKGUrdkPvugFBTrFTa", - "IJnK1o7i55mGq9xlr21ShMEn3kaumxIN0MXpKfKro2GqUd6jgwi1jphII/RyloCcUiUk4kTTKTwyK7xN", - "Oad8bFYw5RYJzRs2Q9I9Xz75jKTKUTdzE/tt+YzzSaojcc3tHDVJNTLfLMtGBB+Tly/hkDRAr4Wd4zkN", - "EBfzwd0NJzwazurD5xNBKyQcDU1cU1pIiB594qW6w2saB9hrDAfYiY8DnEllPjru7CdLuGTpAn+uoVTP", - "1sQer0B0qcUS+588M/VfNnZu261022181vGD+64Ougd33eg1pbj38zntDg28ZWdP7hDIvFuov/Jx0x/c", - "RfyAzcJyTMqIrIhGhh0IU0n17NzEdIfPIRAJ8jB12rXB3hZG9nEh0kTrBN/e2lbbqAHJL0z5QkN0eHZi", - "66+YcDI20ebiFDE6gnAWMkCpbYvVQoE9bXhzdNIeEhNusiLAFoVUW1Wb0THhZn0c4ClI5eh2d3o79sxI", - "JMBJQvEA79pH9ixiYkXsTPL+0BisWxgHtZ5wElnete8gGR2rRHDldNPvdl1DjWtw/QRS9FQ7vyu3yXMJ", - "clX69BSsCuegYNQQgom3jtGZs1Uax0TOjOz2KQonEF7ZVx2bhdRCgV5RpU/ckO+UaK3erGuD1bqydUkN", - "XyYVe/ZvA7zX7d2bhl1zvIHse05SPRGSfoXIEN2/R7MuJHrCNUhOGFIgpyB9q7PshHjwsep+Hz/ffi7b", - "3aqr0FUiVIOtS2fW2IUIUPqpiGb3JmLDqfhtNRyZYHpbQ1r/3jjwAGtQst22DLO+gquNiJrx8JFD1wYM", - "/ZREKDsn+bMQvdfd2wCi51rzW+RJZylj9uDb95WKZmA5nna+mYR665IbA1ewV73tmX2eeVtCJIlBg1SW", - "gzkbvX3VBh6KCCK/vc62EOatLWeyeiNL5FWPCkqKm+8xfa55215DdWmpOlF+wmQNmDjrZsAIFlYL32F/", - "dyuouBT0a/+576r82n/u+iq/7h4Wd4MeBizdTYXm7JT4J/hWgu8F+GRfKM2GJr9FXFHt5aM2UvBlfdW7", - "1Hw5hz/LvnXKvrK6llZ+RY/7AYu/uZuDa9V/92fiAm9NCvctFL/3/VvVfdsCaYciW4G5a4a0sGg5xnW+", - "0Wid+qvA/FwKbkiXtlNy35VVBrqNF1cZ4a1McfZUwN4/9YVWKY8srLU2auvuZmPWxsujrYaPrZBqqqsH", - "kA7cQGg78Y158/gGwodEVVBru4s4NqjXAhnOUg2o5e/sKvOwM6S84y5aW+pfUpCzgnzopuOgqUyrdZzn", - "6rEaM4eMidAGY5QoSCPRfvfuPwsoaz2rUM0vGle658WPLeb9qedy4ly785rqcEL52Ij+AYbnZitkr/Zq", - "EQr2J/vCXvdgw6T96dU2OeKxR7FHJqK88MspJWiqRHiFWrlxHzV6KRNjtawnn2nplRm3CT99LhgT18jw", - "hVpKSyCxO5o4Pz9e5JwjO6fZSxZcNakTLg6pGeVgY4IEnUruriWBdf5GB3V3dBto97pNZ0irE56GG92B", - "KXDddhqoIq7hMq6ZkDBC+fKR9Y2hGCNP4mf6W696sojMPc3h1GKzyb38offiPPjWDfhLF1jZyf/fIasc", - "CT5iNNSoXWDEcEG52XTxaDhDQpavVGwT+D1YC8lsZPRyNeI/e7cQ//42x18a/4Xt/+YeEAopIdTuotV2", - "HV2VNj0lV27Zu1nFnacg21RfnJ42JwR3S0V1vrkPJ6s6LcXPwh+o+mpYJGNtK7zMXwuKwN862riHCZn9", - "4n5Lj9uM4jIRbEAvd4Sao3b5zxVsAy7vvyXf9Acb1mrIb9Qr8rt4P4pXbDoDeR4Isz9RqehjWxzUIS2T", - "RIu5tr1PKEsPJi/8mE0cS/qgcIdDyUyCn+c3axxJlpS17EAyD80Pdxz5B2Lf/Rk3Q9nCyPfzIPKHP4ic", - "ZjYsotiaR48PV3isdfCYl5ybPXa8+HHyKVVbmUr91bJpnqIWdb03CbDu5oLipk86L7Z4X/QCsmRbOuW0", - "C5gVm+4avhIhYSiCKTCR2B/WurE4wKlk/ucbg477of9EKD140n3Sxbefb/8fAAD//7jaqHYZTgAA", + "H4sIAAAAAAAC/+xc+28Tu5f/Vyzv90plNWkebVma7w+rUgpUolBRKNqlbOWMTxLfeuzB9qQNqP/7yo+Z", + "zGQmj0KbS+9FQiKZsX1en/PwsdPvOJZJKgUIo3H/O9bxGBLiPh4YQ+LxueRZAu/hawba2Mepkikow8AN", + "SmQmzGVKzNh+o6BjxVLDpMB9fErMGF2PQQGauFWQHsuMUzQA5OYBxRGGG5KkHHAftxNh2pQYgiNspql9", + "pI1iYoRvI6yAUCn41JMZkowb3B8SriGaI3til0ZEIzul5eYU6w2k5EAEvnUrfs2YAor7n8tifCkGy8Gf", + "EBtL/FABMXCckNFiTQiSQF0H7w6PEbPzkIIhKBAxoC3YHm1HiMr4CtQ2k23OBoqoaVuMmLjpc2JAmycV", + "1SwfW9fXnHiOtyWCCW2IiBfLBmJi/yOUMisX4aeV1zVjVXVwJCZMSZGAMGhCFCMDDros3nf89t2Lo8uj", + "t+e4bynTLHZTI3z67v0H3Mc7nU7HrlvjfyxNyrPRpWbfoIIMvPPqOZ5n5KDgHyWQSDVFQ6lQWANtjbOE", + "iJZFjeXQvkuIQZxdAbqw613gCF3g7qsLXDVOz5GqKcGZfS1ErDA14SkTsNDW0QLova6KYwehLS6vQcVE", + "A+JgDCgdIcpGzOgIEUERJXoMGlmn+TeKiRDSIG2IMkgqBIKia2bGiLhxVSUk09a1VFdcEtrq4ggn5OYN", + "iJGNC093IpwSS82y9X+fSetbp7X/ZSt8aH35z/zRk//+V6N8YOzadRHf+hcolmLIRpki9rkzqhkDYgHW", + "OKrB2WqEVgBjVFaLJJ/GYMagkJGIuGBYLGkfWRJhOso5LGnEL9gQd2oglhNQnEwbQNztNKD4k2LGWTTM", + "Q5TpK2Qnr4CwXc1jeK9TB3GnGcUNTDXw9NwiKvjUOpwUjHR7J+Fjb12/msRppiss9ebZeZslA1BIDtGE", + "KZMRjg5PP1ZCTq9YmAkDI1DNMTP34sWxc0V+ZLQhAqQhCMWZNjJBjIIwbMhAoS2SGdkagQBFDFDEhsi6", + "YKrkhFGgVf1MJG/ZdOn8bc2g4NlFQbiK+7qlfKJeBITL0aC+5Jm1NxNoxEZkMDXV0N7trKvofP0mVR8p", + "JVVdubGkDSIepClnsQsFLZ1CzIYsRmBXQHYC2kpIPGYCCnBWtTog9FIFc0ZNqc0QxnUD2Vly8cTCSLRl", + "41GSccNSDv6dtiSZgcSt8y8FQ9zH/9GelWLtUIe1neQv3EqlBEiUIlOXY4QAdQm5eu6wUgJaN+anubSR", + "y1IMceGVwiAbjaxKyqo7YVozMUK5ddGQAad9n+5WFinOmjPGFuIgyLAmGt7YhNfiMAFeBoH3KMtsIhWg", + "AifeaBWpmJgQzuglE2nWCImFqnyZKZc//KKIDGRmXNrwBisTcbWl8/WhzARtVFZNHa+BcF94VzWhDTFZ", + "yHRZYnUrr6w+Z+Tk1UpzhEWazHCcVzZzBkgagt3hyQs0VDKxOdoQJkChBAwJZX7B0WfsCloc4ZbFFCWQ", + "SIHkcPhvy0HhKvUol3FucTqXbwsHiV2QppfENLBm31lEG5aANiRJ0db7l4c7Ozv787mxt9fqdFvdvQ/d", + "Tr9j//0vjrDPabZkIwZadpHGgMFGITNUqb8HLfkEKEqIYEPQBoWRZcp6THp7T/tkEHd7OxSGu3tPt7e3", + "m8iAMGqaSiYaSB0V79YzRdvXoK3Zmtt6/HN2eIAdxDqyfMenBx9e271lplWby5jwth4w0S99L77OXrgP", + "/uuAicadRxFz5zh1ISZEBJu+vRshptGQMD63400zzsPzvpVEQFwAUrpgs0Cvq9L8WwtNzr4BRY07UENG", + "tqL3iPu5rWaEv2aQwWUqNfPUa32A8MYWCYOMcYrcDLRlhctLHPeoWuD0FopflBKhbPBlR43wi6IwtpTt", + "mEAzE4Zx1x+YViju7Tx99l+d/W6v5NxMmKe7eC1WirA7Vxw7mcPbqIjJKQjqM6iFgf8USzGxXuG+OP5s", + "nPHAqQTw/F3NGHYbwsTokrIGdH7yLxFlCmLjNsCrfQi3SZquhmJzVVfEtEL8UkRuzC35pq2eXu4/lO/c", + "LZQ/TBuk3tQg+lILkuqxbBA135QSlI9BcMO00WHfy3R541tIHlpl8/vRphZKpRoMzZEle7v1miENpcFB", + "da+TCfY1g8pu6PDj8Yte2DtWyZhvu2T/2c0NMftP2bXe/5YM1OjPHfJIGjFLWyc/2/+Qwzu0P5qgVXQ5", + "mA6ND6A/3PGIMEsbbK81Gwmg6PgUEUoVaF3OB/nyVaN393vb3afPtrudzna3s052TEi8hPbJweH6xDs9", + "X/n1yaAf0z4MfyI7B7P5lhzh12Sq0UXez7jA6HoMAgUzzWXn0PNYa39Qbyz9WB9pzgorO0V36QytFT1c", + "C3JB6D9z7cm7x/29hXF/pVVtLoNV++08kZ25wW6WTNOFQsj0TjL0VuSulTKUumj33TljtsittM9yla2d", + "/c9yDVe5y1+7Ygr6F6KFfBeO9tH5yQkKq6NBZlDRugaKtg65zCh6PU1BTZiWCgli2ASe2BXeZ0IwMbIr", + "uIAX2zd8ipR/vnzyKcm0p27npu7b8hln48xQeS3cHD3ODLLfHMtWhJDLly/hkdRHb6WbEziNbOyaKwr8", + "cCLoYFofPl9AbMVEoIHNh9pIBfTJhSjVq0HTOMJBYzjCXnwc4Vwq+9Fz5z45wiVLz/DnG5H1Ki/PN5dG", + "LrH/8QsbqvOxc+0abVp+w7yOH9x3VdnZv2uDoKk0+jhfC92h8bvsSNafjdp3C/VXPoX9wfz2CzaZyzEp", + "J7IiGll2IM4UM9MzG9M9PgdAFKiDzGvXBXtX7rjHM5HGxqT49ta1aIcNSH5ly14Wo4PTY1e3J0SQkY02", + "5yeIsyHE05gDylw7tRYK3CHcu8Pj1oDYcJMXj24zwYxTtR2dEGHXxxGegNKebme7u+2OUmUKgqQM9/GO", + "e+SO6MZOxPa46CuOwLmFdVDnCcfU8W5C59HqWKdSaK+bXqfjG7HCgO9DkVkvvv2n9s0BnyBXpc9Awalw", + "DgpWDb449YxOva2yJCFqamV3T1E8hvjKvWq7LKQXCvSGaXPsh/ykRGv19H37tNbNr0tq+bKpOLB/G+Hd", + "TvfeNOwPVRrIfhQkM2Op2DeglujePZp1IdFjYUAJwpEGNQEVWuRlJ8T9z1X3+/zl9kvZ7k5dM12lUjfY", + "unSVA/sQAdo8l3R6byI2XBa5rYYjG0xva0jr3RsHAWANSnbb3UHej/K1EdFTET/x6NqAoZ8TivLztb8K", + "0bud3Q0geu5I5xF50qndCxNBUehHzprI5Xja/m4T6q1Pbhx8wV71thfuee5tKVEkAQNKOw7mbPT+TQtE", + "LCnQ0JbJtxD2rStn8nojT+RVj4pKipvfHH+pedtuQ3XpqHpRfsNkDZh46+bAiBZWCz9hf39ZbnZX7o/e", + "y9CN+6P30vfj/tg5mF2ZexiwdDYVmvPbBb/BtxJ8ryAk+5nSXGgKW8QV1V4xaiMFX96JvUvNV3D4u+xb", + "p+wrq2tp5Tfrij9g8Td3oXat+u/+TDzDW5PCQwsl7H3/UXXfY4G0R5GrwPztWzazaDnGtb8zuk79VToJ", + "qqbghnTpOiX3XVnloNt4cZUTfpQpzp0KuGvZodAq5ZGFtdZGbd3ZbMzaeHn0qOHjKqSa6uoBpA034I5J", + "m/Pm0Q3ED4mqqNZ2l0liUW8kspxlBtBWOPfU9mF7wETb//7AUf+agZrOyMd+Oo6ayrRax3muHqsxc8C5", + "jF0wRqmGjMrWhw//s4CyMdMK1bkfF9R+CzDvT12fE+fandfMxGMmRlb0TzA4s1shdyXcyFjyv9gXdjv7", + "GyYdTq8ekyMeBRQHZCImZn45YQRNtIyv0FZh3CeNXsrlSC/ryedaemPHbcJPX0rO5TWyfKEtbRSQxB9N", + "nJ0dLXLOoZvT7CULrijVCc8OqTkT4GKCApMp4a+zgXP+Rgf1d7sbaHc7TWdIqxOegRvThgkI0/IaqCKu", + "4ZKGnZBywsTykfWNoRyhQOJ3+luvenKILDzN49Rhs8m9wqH34jz43g/4WxdY+cn/PyGrHEox5Cw2qDXD", + "iOWCCbvpEnQwRVKVr1Q8JvAHsM4kc5ExyNWI//zdQvyH2xx/a/zPbP8P94BYKgWx8RetHtfRVWnTU3Ll", + "LXc3a3bnKco31ecnJ80Jwd9S0e3v/sPxqk7L7K8lPFD11bBIztqj8LJwLYhCuHW0cQ+TKv9DFI/0uM39", + "Bj2I4AJ6uSPUHLXLf8XjMeDy/lvyTX/HZK2G/Ea9oriL96t4xaYzUOCBcPfTpoo+HouDeqTlkhg517YP", + "CWXpweR5GLOJY8kQFO5wKJlL8Pv8Zo0jyZKylh1IFqH54Y4jfyD23Z9xc5QtjHy/DyJ/+YPISW7DWRRb", + "8+jx4QqPtQ4ei5Jzs8eO579OPi39hO8RXi2bFClqUdd7kwDrbC4obvqk8/wR74teQZ5sS6ecbgG7YtNd", + "wzcyJhxRmACXqftBth+LI5wpHn6+0W/7PxAxltr0n3WedfDtl9v/DwAA//+aatGNMFEAAA==", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/paths/paths.go b/lib/paths/paths.go index a2abcf3d..ffc7dee0 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -21,8 +21,11 @@ // overlay.raw // config.ext4 // ch.sock +// vsock.sock // logs/ // snapshots/ +// snapshot-latest/ +// config.json package paths import "path/filepath" @@ -163,8 +166,13 @@ func (p *Paths) InstanceSnapshotLatest(id string) string { return filepath.Join(p.InstanceSnapshots(id), "snapshot-latest") } +// InstanceSnapshotConfig returns the path to the snapshot config.json file. +// Cloud Hypervisor creates config.json in the snapshot directory. +func (p *Paths) InstanceSnapshotConfig(id string) string { + return filepath.Join(p.InstanceSnapshotLatest(id), "config.json") +} + // GuestsDir returns the root guests directory. func (p *Paths) GuestsDir() string { return filepath.Join(p.dataDir, "guests") } - diff --git a/lib/providers/providers.go b/lib/providers/providers.go index d5e3c2e2..d3de2cb2 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -11,6 +11,7 @@ import ( "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" @@ -48,14 +49,19 @@ func ProvideSystemManager(p *paths.Paths) system.Manager { return system.NewManager(p) } +// ProvideNetworkManager provides the network manager +func ProvideNetworkManager(p *paths.Paths, cfg *config.Config) network.Manager { + return network.NewManager(p, cfg) +} + // ProvideInstanceManager provides the instance manager -func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager) (instances.Manager, error) { +func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager) (instances.Manager, error) { // Parse max overlay size from config var maxOverlaySize datasize.ByteSize if err := maxOverlaySize.UnmarshalText([]byte(cfg.MaxOverlaySize)); err != nil { return nil, fmt.Errorf("failed to parse MAX_OVERLAY_SIZE '%s': %w (expected format like '100GB', '50G', '10GiB')", cfg.MaxOverlaySize, err) } - return instances.NewManager(p, imageManager, systemManager, int64(maxOverlaySize)), nil + return instances.NewManager(p, imageManager, systemManager, networkManager, int64(maxOverlaySize)), nil } // ProvideVolumeManager provides the volume manager diff --git a/lib/system/init_script.go b/lib/system/init_script.go index c51e0d30..7df0a70a 100644 --- a/lib/system/init_script.go +++ b/lib/system/init_script.go @@ -96,11 +96,11 @@ chroot /overlay/newroot ln -sf /proc/self/fd/2 /dev/stderr 2>/dev/null || true if [ -n "${GUEST_IP:-}" ]; then echo "overlay-init: configuring network" chroot /overlay/newroot ip link set lo up - chroot /overlay/newroot ip addr add ${GUEST_IP}/${GUEST_MASK} dev eth0 + chroot /overlay/newroot ip addr add ${GUEST_IP}/${GUEST_CIDR} dev eth0 chroot /overlay/newroot ip link set eth0 up chroot /overlay/newroot ip route add default via ${GUEST_GW} echo "nameserver ${GUEST_DNS}" > /overlay/newroot/etc/resolv.conf - echo "overlay-init: network configured - IP: ${GUEST_IP}" + echo "overlay-init: network configured - IP: ${GUEST_IP}/${GUEST_CIDR}" fi # Set PATH for initrd tools diff --git a/openapi.yaml b/openapi.yaml index 1d6965eb..1cae731f 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -131,6 +131,15 @@ components: example: PORT: "3000" NODE_ENV: production + network: + type: object + description: Network configuration for the instance + properties: + enabled: + type: boolean + description: Whether to attach instance to the default network + default: true + example: true # Future: volumes, port_mappings, timeout_seconds Instance: @@ -172,6 +181,28 @@ components: additionalProperties: type: string description: Environment variables + network: + type: object + description: Network configuration of the instance + properties: + enabled: + type: boolean + description: Whether instance is attached to the default network + example: true + name: + type: string + description: Network name (always "default" when enabled) + example: "default" + ip: + type: string + description: Assigned IP address (null if no network) + example: "192.168.100.10" + nullable: true + mac: + type: string + description: Assigned MAC address (null if no network) + example: "02:00:00:ab:cd:ef" + nullable: true created_at: type: string format: date-time @@ -367,6 +398,7 @@ paths: schema: $ref: "#/components/schemas/Health" + /images: get: summary: List images diff --git a/stainless.yaml b/stainless.yaml new file mode 100644 index 00000000..393b7f26 --- /dev/null +++ b/stainless.yaml @@ -0,0 +1,138 @@ +# yaml-language-server: $schema=https://app.stainless.com/config.schema.json + +# The main edition for the config, see the [docs] for more information. +# +# [docs]: https://www.stainless.com/docs/reference/editions +edition: 2025-10-10 + +organization: + name: hypeman + # Link to your API documentation. + docs: '' + # Contact email for bug reports, questions, and support requests. + contact: '' + +# `targets` define the output targets and their customization options, such as +# whether to emit the Node SDK and what its package name should be. +targets: + go: + # The edition for this target, see the [docs] for more information. + # + # [docs]: https://www.stainless.com/docs/reference/editions + edition: go.2025-10-08 + package_name: hypeman + production_repo: onkernel/hypeman-go + options: + enable_v2: true + cli: + edition: cli.2025-10-08 + binary_name: hypeman + production_repo: onkernel/hypeman-cli + options: + go_sdk_package: github.com/onkernel/hypeman-go + publish: + homebrew: + tap_repo: onkernel/homebrew-tap + homepage: https://github.com/onkernel/hypeman + description: orchestrate cloud-hypervisor VMs + +# `environments` are a map of the name of the environment (e.g. "sandbox", +# "production") to the corresponding url to use. +environments: + production: http://localhost:8080 + +# `resources` define the structure and organization for your API, such as how +# methods and models are grouped together and accessed. See the [configuration +# guide] for more information. +# +# [configuration guide]: https://www.stainless.com/docs/guides/configure#resources +resources: + health: + # Configure the methods defined in this resource. Each key in the object is the + # name of the method and the value is either an endpoint (for example, `get /foo`) + # or an object with more detail. + # + # [reference]: https://www.stainless.com/docs/reference/config#method + methods: + check: get /health + + images: + # Configure the models--named types--defined in the resource. Each key in the + # object is the name of the model and the value is either the name of a schema in + # `#/components/schemas` or an object with more detail. + # + # [reference]: https://www.stainless.com/docs/reference/config#model + models: + image: '#/components/schemas/Image' + methods: + list: get /images + create: post /images + retrieve: get /images/{name} + delete: delete /images/{name} + + instances: + models: + volume_attachment: '#/components/schemas/VolumeAttachment' + port_mapping: '#/components/schemas/PortMapping' + instance: '#/components/schemas/Instance' + methods: + list: get /instances + create: post /instances + retrieve: get /instances/{id} + delete: delete /instances/{id} + put_in_standby: post /instances/{id}/standby + restore_from_standby: post /instances/{id}/restore + stream_logs: get /instances/{id}/logs + # Subresources define resources that are nested within another for more powerful + # logical groupings, e.g. `cards.payments`. + subresources: + volumes: + methods: + attach: post /instances/{id}/volumes/{volumeId} + detach: delete /instances/{id}/volumes/{volumeId} + + volumes: + models: + volume: '#/components/schemas/Volume' + methods: + list: get /volumes + create: post /volumes + retrieve: get /volumes/{id} + delete: delete /volumes/{id} + +settings: + # All generated integration tests that hit the prism mock http server are marked + # as skipped. Removing this setting or setting it to false enables tests, but + # doing so may result in test failures due to bugs in the test server. + # + # [prism mock http server]: https://stoplight.io/open-source/prism + disable_mock_tests: true + license: Apache-2.0 + +# `client_settings` define settings for the API client, such as extra constructor +# arguments (used for authentication), retry behavior, idempotency, etc. +client_settings: + opts: + api_key: + type: string + nullable: false + auth: + security_scheme: bearerAuth + read_env: HYPEMAN_API_KEY + +security: + - bearerAuth: [] + +# `readme` is used to configure the code snippets that will be rendered in the +# README.md of various SDKs. In particular, you can change the `headline` +# snippet's endpoint and the arguments to call it with. +readme: + example_requests: + default: + type: request + endpoint: get /health + params: {} + headline: + type: request + endpoint: get /health + params: {}