diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index fda6127b..c2cce0f0 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -40,7 +40,7 @@ func newTestService(t *testing.T) *ApiService { limits := instances.ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB } - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil) // Register cleanup for orphaned Cloud Hypervisor processes t.Cleanup(func() { diff --git a/cmd/api/api/cp.go b/cmd/api/api/cp.go index b611f64e..89b40a8b 100644 --- a/cmd/api/api/cp.go +++ b/cmd/api/api/cp.go @@ -11,6 +11,7 @@ import ( "github.com/gorilla/websocket" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" mw "github.com/onkernel/hypeman/lib/middleware" @@ -218,7 +219,13 @@ func (s *ApiService) CpHandler(w http.ResponseWriter, r *http.Request) { // handleCopyTo handles copying files from client to guest // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - grpcConn, err := guest.GetOrCreateConnPublic(ctx, inst.VsockSocket) + // Create vsock dialer for this hypervisor type + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return 0, fmt.Errorf("create vsock dialer: %w", err) + } + + grpcConn, err := guest.GetOrCreateConn(ctx, dialer) if err != nil { return 0, fmt.Errorf("get grpc connection: %w", err) } @@ -322,7 +329,13 @@ func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst // handleCopyFrom handles copying files from guest to client // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyFrom(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - grpcConn, err := guest.GetOrCreateConnPublic(ctx, inst.VsockSocket) + // Create vsock dialer for this hypervisor type + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return 0, fmt.Errorf("create vsock dialer: %w", err) + } + + grpcConn, err := guest.GetOrCreateConn(ctx, dialer) if err != nil { return 0, fmt.Errorf("get grpc connection: %w", err) } @@ -406,4 +419,3 @@ func (s *ApiService) handleCopyFrom(ctx context.Context, ws *websocket.Conn, ins } return bytesReceived, nil } - diff --git a/cmd/api/api/cp_test.go b/cmd/api/api/cp_test.go index 3278852e..6b737dfc 100644 --- a/cmd/api/api/cp_test.go +++ b/cmd/api/api/cp_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" @@ -109,10 +110,14 @@ func TestCpToAndFromInstance(t *testing.T) { err = os.WriteFile(srcFile, []byte(testContent), 0644) require.NoError(t, err) + // Create vsock dialer + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + // Test 1: Copy file TO instance t.Log("Testing CopyToInstance...") dstPath := "/tmp/copied-file.txt" - err = guest.CopyToInstance(ctx(), actualInst.VsockSocket, guest.CopyToInstanceOptions{ + err = guest.CopyToInstance(ctx(), dialer, guest.CopyToInstanceOptions{ SrcPath: srcFile, DstPath: dstPath, }) @@ -121,7 +126,7 @@ func TestCpToAndFromInstance(t *testing.T) { // Verify the file was copied by reading it back via exec t.Log("Verifying file was copied via exec...") var stdout, stderr outputBuffer - exit, err := guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{ Command: []string{"cat", dstPath}, Stdout: &stdout, Stderr: &stderr, @@ -134,7 +139,7 @@ func TestCpToAndFromInstance(t *testing.T) { // Test 2: Copy file FROM instance t.Log("Testing CopyFromInstance...") localDstDir := t.TempDir() - err = guest.CopyFromInstance(ctx(), actualInst.VsockSocket, guest.CopyFromInstanceOptions{ + err = guest.CopyFromInstance(ctx(), dialer, guest.CopyFromInstanceOptions{ SrcPath: dstPath, DstPath: localDstDir, }) @@ -211,6 +216,10 @@ func TestCpDirectoryToInstance(t *testing.T) { actualInst, err := svc.InstanceManager.GetInstance(ctx(), inst.Id) require.NoError(t, err) + // Create vsock dialer + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + // Create a test directory structure srcDir := filepath.Join(t.TempDir(), "testdir") require.NoError(t, os.MkdirAll(filepath.Join(srcDir, "subdir"), 0755)) @@ -219,7 +228,7 @@ func TestCpDirectoryToInstance(t *testing.T) { // Copy directory to instance t.Log("Copying directory to instance...") - err = guest.CopyToInstance(ctx(), actualInst.VsockSocket, guest.CopyToInstanceOptions{ + err = guest.CopyToInstance(ctx(), dialer, guest.CopyToInstanceOptions{ SrcPath: srcDir, DstPath: "/tmp/testdir", }) @@ -227,7 +236,7 @@ func TestCpDirectoryToInstance(t *testing.T) { // Verify files exist via exec var stdout outputBuffer - exit, err := guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{ Command: []string{"cat", "/tmp/testdir/file1.txt"}, Stdout: &stdout, TTY: false, @@ -237,7 +246,7 @@ func TestCpDirectoryToInstance(t *testing.T) { assert.Equal(t, "file1 content", stdout.String()) stdout = outputBuffer{} - exit, err = guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, err = guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{ Command: []string{"cat", "/tmp/testdir/subdir/file2.txt"}, Stdout: &stdout, TTY: false, @@ -249,7 +258,7 @@ func TestCpDirectoryToInstance(t *testing.T) { // Copy directory from instance t.Log("Copying directory from instance...") localDstDir := t.TempDir() - err = guest.CopyFromInstance(ctx(), actualInst.VsockSocket, guest.CopyFromInstanceOptions{ + err = guest.CopyFromInstance(ctx(), dialer, guest.CopyFromInstanceOptions{ SrcPath: "/tmp/testdir", DstPath: localDstDir, }) @@ -266,4 +275,3 @@ func TestCpDirectoryToInstance(t *testing.T) { t.Log("Directory cp tests passed!") } - diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index 9c41197f..c06b5a7a 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -12,6 +12,7 @@ import ( "github.com/gorilla/websocket" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" mw "github.com/onkernel/hypeman/lib/middleware" @@ -110,8 +111,17 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Create WebSocket read/writer wrapper wsConn := &wsReadWriter{ws: ws, ctx: ctx} + // Create vsock dialer for this hypervisor type + dialer, err := hypervisor.NewVsockDialer(hypervisor.Type(inst.HypervisorType), inst.VsockSocket, inst.VsockCID) + if err != nil { + log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + ws.WriteMessage(websocket.BinaryMessage, []byte(fmt.Sprintf("Error: %v\r\n", err))) + ws.WriteMessage(websocket.TextMessage, []byte(`{"exitCode":127}`)) + return + } + // Execute via vsock - exit, err := guest.ExecIntoInstance(ctx, inst.VsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ Command: execReq.Command, Stdin: wsConn, Stdout: wsConn, diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 08ad1937..d4a88a61 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/oapi" "github.com/onkernel/hypeman/lib/paths" @@ -119,13 +120,16 @@ func TestExecInstanceNonTTY(t *testing.T) { var stdout, stderr outputBuffer var execErr error + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + t.Log("Testing exec command: whoami") maxRetries := 10 for i := 0; i < maxRetries; i++ { stdout = outputBuffer{} stderr = outputBuffer{} - exit, execErr = guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, execErr = guest.ExecIntoInstance(ctx(), dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "whoami"}, Stdin: nil, Stdout: &stdout, @@ -250,9 +254,12 @@ func TestExecWithDebianMinimal(t *testing.T) { assert.Contains(t, logs, "overlay-init: app exited with code", "App should have exited") // Test exec commands work even though the main app (bash) has exited + dialer2, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + t.Log("Testing exec command: echo") var stdout, stderr outputBuffer - exit, err := guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx(), dialer2, guest.ExecOptions{ Command: []string{"echo", "hello from debian"}, Stdout: &stdout, Stderr: &stderr, @@ -266,7 +273,7 @@ func TestExecWithDebianMinimal(t *testing.T) { // Verify we're actually in Debian t.Log("Verifying OS release...") stdout = outputBuffer{} - exit, err = guest.ExecIntoInstance(ctx(), actualInst.VsockSocket, guest.ExecOptions{ + exit, err = guest.ExecIntoInstance(ctx(), dialer2, guest.ExecOptions{ Command: []string{"cat", "/etc/os-release"}, Stdout: &stdout, TTY: false, diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index f01695db..69968b0c 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -9,6 +9,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/logger" mw "github.com/onkernel/hypeman/lib/middleware" @@ -137,6 +138,12 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst } } + // Convert hypervisor type from API enum to domain type + var hvType hypervisor.Type + if request.Body.Hypervisor != nil { + hvType = hypervisor.Type(*request.Body.Hypervisor) + } + domainReq := instances.CreateInstanceRequest{ Name: request.Body.Name, Image: request.Body.Image, @@ -148,6 +155,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst NetworkEnabled: networkEnabled, Devices: deviceRefs, Volumes: volumes, + Hypervisor: hvType, } inst, err := s.InstanceManager.CreateInstance(ctx, domainReq) @@ -452,8 +460,17 @@ func (s *ApiService) StatInstancePath(ctx context.Context, request oapi.StatInst }, nil } - // Connect to guest agent - grpcConn, err := guest.GetOrCreateConnPublic(ctx, inst.VsockSocket) + // Create vsock dialer for this hypervisor type + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + return oapi.StatInstancePath500JSONResponse{ + Code: "internal_error", + Message: "failed to create vsock dialer", + }, nil + } + + grpcConn, err := guest.GetOrCreateConn(ctx, dialer) if err != nil { log.ErrorContext(ctx, "failed to get grpc connection", "error", err) return oapi.StatInstancePath500JSONResponse{ @@ -537,6 +554,9 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance { netObj.Mac = lo.ToPtr(inst.MAC) } + // Convert hypervisor type + hvType := oapi.InstanceHypervisor(inst.HypervisorType) + oapiInst := oapi.Instance{ Id: inst.Id, Name: inst.Name, @@ -552,6 +572,7 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance { StartedAt: inst.StartedAt, StoppedAt: inst.StoppedAt, HasSnapshot: lo.ToPtr(inst.HasSnapshot), + Hypervisor: &hvType, } if len(inst.Env) > 0 { diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index a3e5a556..2d43b526 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -101,6 +101,9 @@ type Config struct { // Cloudflare configuration (if AcmeDnsProvider=cloudflare) CloudflareApiToken string // Cloudflare API token + + // Hypervisor configuration + DefaultHypervisor string // Default hypervisor type: "cloud-hypervisor" or "qemu" } // Load loads configuration from environment variables @@ -163,6 +166,9 @@ func Load() *Config { // Cloudflare configuration CloudflareApiToken: getEnv("CLOUDFLARE_API_TOKEN", ""), + + // Hypervisor configuration + DefaultHypervisor: getEnv("DEFAULT_HYPERVISOR", "cloud-hypervisor"), } return cfg diff --git a/cmd/api/main.go b/cmd/api/main.go index 5d003352..dfb556b1 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -23,6 +23,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/api" "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor/qemu" "github.com/onkernel/hypeman/lib/instances" mw "github.com/onkernel/hypeman/lib/middleware" "github.com/onkernel/hypeman/lib/oapi" @@ -125,6 +126,11 @@ func run() error { } logger.Info("KVM access verified") + // Check if QEMU is available (optional - only warn if not present) + if _, err := (&qemu.Starter{}).GetBinaryPath(nil, ""); err != nil { + logger.Warn("QEMU not available - QEMU hypervisor will not work", "error", err) + } + // Validate log rotation config var logMaxSize datasize.ByteSize if err := logMaxSize.UnmarshalText([]byte(app.Config.LogMaxSize)); err != nil { diff --git a/go.mod b/go.mod index 1affd894..830eb39e 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 github.com/creack/pty v1.1.24 github.com/cyphar/filepath-securejoin v0.6.1 + github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242 github.com/distribution/reference v0.6.0 github.com/getkin/kin-openapi v0.133.0 github.com/ghodss/yaml v1.0.0 @@ -58,6 +59,7 @@ require ( github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e // indirect github.com/docker/cli v28.2.2+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect github.com/docker/docker v28.2.2+incompatible // indirect diff --git a/go.sum b/go.sum index 6772c9ed..3edd3725 100644 --- a/go.sum +++ b/go.sum @@ -40,6 +40,10 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e h1:SCnqm8SjSa0QqRxXbo5YY//S+OryeJioe17nK+iDZpg= +github.com/digitalocean/go-libvirt v0.0.0-20220804181439-8648fbde413e/go.mod h1:o129ljs6alsIQTc8d6eweihqpmmrbxZ2g1jhgjhPykI= +github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242 h1:rh6rt8pF5U4iyQ86h6lRDenJoX4ht2wFnZXB9ogIrIM= +github.com/digitalocean/go-qemu v0.0.0-20250212194115-ee9b0668d242/go.mod h1:LGHUtlhsY4vRGM6AHejEQKVI5e3eHbSylMHwTSpQtVw= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/docker/cli v28.2.2+incompatible h1:qzx5BNUDFqlvyq4AHzdNB7gSyVTmU4cgsyN9SdInc1A= @@ -233,6 +237,7 @@ github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIj github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/bridges/otelslog v0.13.0 h1:bwnLpizECbPr1RrQ27waeY2SPIPeccCx/xLuoYADZ9s= @@ -279,6 +284,7 @@ golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -286,12 +292,14 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -299,12 +307,16 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -316,6 +328,7 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/lib/devices/gpu_e2e_test.go b/lib/devices/gpu_e2e_test.go index f742b9f9..e279ed51 100644 --- a/lib/devices/gpu_e2e_test.go +++ b/lib/devices/gpu_e2e_test.go @@ -12,6 +12,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/network" @@ -72,7 +73,7 @@ func TestGPUPassthrough(t *testing.T) { limits := instances.ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB } - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil) // Step 1: Discover available GPUs t.Log("Step 1: Discovering available GPUs...") @@ -218,6 +219,9 @@ func TestGPUPassthrough(t *testing.T) { actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) require.NoError(t, err) + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + // Create a context with timeout for exec operations execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() @@ -232,7 +236,7 @@ func TestGPUPassthrough(t *testing.T) { stdout = outputBuffer{} stderr = outputBuffer{} - _, execErr = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, execErr = guest.ExecIntoInstance(execCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", checkGPUCmd}, Stdin: nil, Stdout: &stdout, diff --git a/lib/devices/gpu_inference_test.go b/lib/devices/gpu_inference_test.go index 0992193c..15f1d80e 100644 --- a/lib/devices/gpu_inference_test.go +++ b/lib/devices/gpu_inference_test.go @@ -23,6 +23,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/network" @@ -111,7 +112,7 @@ func TestGPUInference(t *testing.T) { limits := instances.ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, } - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil) // Step 1: Build custom CUDA+Ollama image t.Log("Step 1: Building custom CUDA+Ollama Docker image...") @@ -285,6 +286,9 @@ func TestGPUInference(t *testing.T) { actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) require.NoError(t, err) + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + // Step 10: Wait for Ollama server t.Log("Step 10: Waiting for Ollama server to be ready...") ollamaReady := false @@ -292,7 +296,7 @@ func TestGPUInference(t *testing.T) { healthCtx, healthCancel := context.WithTimeout(ctx, 5*time.Second) var healthStdout, healthStderr inferenceOutputBuffer - _, err = guest.ExecIntoInstance(healthCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(healthCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, Stdout: &healthStdout, Stderr: &healthStderr, @@ -319,7 +323,7 @@ func TestGPUInference(t *testing.T) { // Check nvidia-smi (should work now with CUDA image) var nvidiaSmiStdout, nvidiaSmiStderr inferenceOutputBuffer - _, _ = guest.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, _ = guest.ExecIntoInstance(gpuCheckCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "nvidia-smi 2>&1 || echo 'nvidia-smi failed'"}, Stdout: &nvidiaSmiStdout, Stderr: &nvidiaSmiStderr, @@ -333,7 +337,7 @@ func TestGPUInference(t *testing.T) { // Check NVIDIA kernel modules var modulesStdout inferenceOutputBuffer - guest.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, guest.ExecOptions{ + guest.ExecIntoInstance(gpuCheckCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia"}, Stdout: &modulesStdout, }) @@ -343,7 +347,7 @@ func TestGPUInference(t *testing.T) { // Check device nodes var devStdout inferenceOutputBuffer - guest.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, guest.ExecOptions{ + guest.ExecIntoInstance(gpuCheckCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1"}, Stdout: &devStdout, }) @@ -355,7 +359,7 @@ func TestGPUInference(t *testing.T) { t.Log("Step 12: Ensuring TinyLlama model is available...") var listStdout inferenceOutputBuffer - guest.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, guest.ExecOptions{ + guest.ExecIntoInstance(gpuCheckCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, Stdout: &listStdout, }) @@ -366,7 +370,7 @@ func TestGPUInference(t *testing.T) { defer pullCancel() var pullStdout inferenceOutputBuffer - _, pullErr := guest.ExecIntoInstance(pullCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, pullErr := guest.ExecIntoInstance(pullCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "ollama pull tinyllama 2>&1"}, Stdout: &pullStdout, }) diff --git a/lib/devices/gpu_module_test.go b/lib/devices/gpu_module_test.go index 251b2549..193cc5fa 100644 --- a/lib/devices/gpu_module_test.go +++ b/lib/devices/gpu_module_test.go @@ -19,6 +19,7 @@ import ( "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" "github.com/onkernel/hypeman/lib/network" @@ -77,7 +78,7 @@ func TestNVIDIAModuleLoading(t *testing.T) { deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil) // Step 1: Find an NVIDIA GPU t.Log("Step 1: Discovering available GPUs...") @@ -194,6 +195,9 @@ func TestNVIDIAModuleLoading(t *testing.T) { actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) require.NoError(t, err) + dialer, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() @@ -204,7 +208,7 @@ func TestNVIDIAModuleLoading(t *testing.T) { for i := 0; i < 10; i++ { stdout = outputBuffer{} stderr = outputBuffer{} - _, err = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(execCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", dmesgCmd}, Stdin: nil, Stdout: &stdout, @@ -234,7 +238,7 @@ func TestNVIDIAModuleLoading(t *testing.T) { // Check lsmod for nvidia modules stdout = outputBuffer{} stderr = outputBuffer{} - _, err = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(execCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia || echo 'No nvidia modules loaded'"}, Stdin: nil, Stdout: &stdout, @@ -254,7 +258,7 @@ func TestNVIDIAModuleLoading(t *testing.T) { // Check for /dev/nvidia* devices stdout = outputBuffer{} stderr = outputBuffer{} - _, err = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(execCtx, dialer, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1 || echo 'No nvidia devices found'"}, Stdin: nil, Stdout: &stdout, @@ -318,7 +322,7 @@ func TestNVMLDetection(t *testing.T) { deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil) // Step 1: Check if ollama-cuda:test image exists in Docker t.Log("Step 1: Checking for ollama-cuda:test Docker image...") @@ -430,13 +434,16 @@ func TestNVMLDetection(t *testing.T) { actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) require.NoError(t, err) + dialer2, err := hypervisor.NewVsockDialer(actualInst.HypervisorType, actualInst.VsockSocket, actualInst.VsockCID) + require.NoError(t, err) + // Step 5: Run NVML test t.Log("Step 5: Running NVML detection test...") execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() var stdout, stderr outputBuffer - _, err = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(execCtx, dialer2, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-nvml.py 2>&1"}, Stdin: nil, Stdout: &stdout, @@ -469,7 +476,7 @@ func TestNVMLDetection(t *testing.T) { t.Log("Step 6: Running CUDA driver test...") stdout = outputBuffer{} stderr = outputBuffer{} - _, err = guest.ExecIntoInstance(execCtx, actualInst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(execCtx, dialer2, guest.ExecOptions{ Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-cuda.py 2>&1"}, Stdin: nil, Stdout: &stdout, diff --git a/lib/guest/client.go b/lib/guest/client.go index 8f06cf82..dc7a7fd3 100644 --- a/lib/guest/client.go +++ b/lib/guest/client.go @@ -1,7 +1,6 @@ package guest import ( - "bufio" "context" "fmt" "io" @@ -16,20 +15,17 @@ import ( "time" securejoin "github.com/cyphar/filepath-securejoin" + "github.com/onkernel/hypeman/lib/hypervisor" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) const ( - // vsockDialTimeout is the timeout for connecting to the vsock Unix socket - vsockDialTimeout = 5 * time.Second - // vsockHandshakeTimeout is the timeout for the Cloud Hypervisor vsock handshake - vsockHandshakeTimeout = 5 * time.Second // vsockGuestPort is the port the guest-agent listens on inside the guest vsockGuestPort = 2222 ) -// connPool manages reusable gRPC connections per vsock socket path +// connPool manages reusable gRPC connections per vsock dialer key // This avoids the overhead and potential issues of rapidly creating/closing connections var connPool = struct { sync.RWMutex @@ -38,16 +34,14 @@ var connPool = struct { conns: make(map[string]*grpc.ClientConn), } -// GetOrCreateConnPublic is a public wrapper for getOrCreateConn for use by the API layer -func GetOrCreateConnPublic(ctx context.Context, vsockSocketPath string) (*grpc.ClientConn, error) { - return getOrCreateConn(ctx, vsockSocketPath) -} +// GetOrCreateConn returns an existing connection or creates a new one using a VsockDialer. +// This supports multiple hypervisor types (Cloud Hypervisor, QEMU, etc.). +func GetOrCreateConn(ctx context.Context, dialer hypervisor.VsockDialer) (*grpc.ClientConn, error) { + key := dialer.Key() -// getOrCreateConn returns an existing connection or creates a new one -func getOrCreateConn(ctx context.Context, vsockSocketPath string) (*grpc.ClientConn, error) { // Try read lock first for existing connection connPool.RLock() - if conn, ok := connPool.conns[vsockSocketPath]; ok { + if conn, ok := connPool.conns[key]; ok { connPool.RUnlock() return conn, nil } @@ -58,14 +52,14 @@ func getOrCreateConn(ctx context.Context, vsockSocketPath string) (*grpc.ClientC defer connPool.Unlock() // Double-check after acquiring write lock - if conn, ok := connPool.conns[vsockSocketPath]; ok { + if conn, ok := connPool.conns[key]; ok { return conn, nil } - // Create new connection + // Create new connection using the VsockDialer conn, err := grpc.Dial("passthrough:///vsock", grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { - return dialVsock(ctx, vsockSocketPath) + return dialer.DialVsock(ctx, vsockGuestPort) }), grpc.WithTransportCredentials(insecure.NewCredentials()), ) @@ -73,20 +67,21 @@ func getOrCreateConn(ctx context.Context, vsockSocketPath string) (*grpc.ClientC return nil, fmt.Errorf("create grpc connection: %w", err) } - connPool.conns[vsockSocketPath] = conn - slog.Debug("created new gRPC connection", "socket", vsockSocketPath) + connPool.conns[key] = conn + slog.Debug("created new gRPC connection", "key", key) return conn, nil } -// CloseConn closes and removes a connection from the pool (call when VM is deleted) -func CloseConn(vsockSocketPath string) { +// CloseConn removes a connection from the pool by key (call when VM is deleted). +// We only remove from pool, not explicitly close - the connection will fail +// naturally when the VM dies, and grpc will clean up. +func CloseConn(dialerKey string) { connPool.Lock() defer connPool.Unlock() - if conn, ok := connPool.conns[vsockSocketPath]; ok { - conn.Close() - delete(connPool.conns, vsockSocketPath) - slog.Debug("closed gRPC connection", "socket", vsockSocketPath) + if _, ok := connPool.conns[dialerKey]; ok { + delete(connPool.conns, dialerKey) + slog.Debug("removed gRPC connection from pool", "key", dialerKey) } } @@ -107,26 +102,14 @@ type ExecOptions struct { Timeout int32 // Execution timeout in seconds (0 = no timeout) } -// bufferedConn wraps a net.Conn with a bufio.Reader to ensure any buffered -// data from the handshake is properly drained before reading from the connection -type bufferedConn struct { - net.Conn - reader *bufio.Reader -} - -func (c *bufferedConn) Read(p []byte) (int, error) { - return c.reader.Read(p) -} - -// ExecIntoInstance executes command in instance via vsock using gRPC -// vsockSocketPath is the Unix socket created by Cloud Hypervisor (e.g., /var/lib/hypeman/guests/{id}/vsock.sock) -func ExecIntoInstance(ctx context.Context, vsockSocketPath string, opts ExecOptions) (*ExitStatus, error) { +// ExecIntoInstance executes command in instance via vsock using gRPC. +// The dialer is a hypervisor-specific VsockDialer that knows how to connect to the guest. +func ExecIntoInstance(ctx context.Context, dialer hypervisor.VsockDialer, opts ExecOptions) (*ExitStatus, error) { start := time.Now() var bytesSent int64 - // Get or create a reusable gRPC connection for this vsock socket - // Connection pooling avoids issues with rapid connect/disconnect cycles - grpcConn, err := getOrCreateConn(ctx, vsockSocketPath) + // Get or create a reusable gRPC connection for this vsock dialer + grpcConn, err := GetOrCreateConn(ctx, dialer) if err != nil { return nil, fmt.Errorf("get grpc connection: %w", err) } @@ -210,77 +193,17 @@ func ExecIntoInstance(ctx context.Context, vsockSocketPath string, opts ExecOpti } } -// dialVsock connects to Cloud Hypervisor's vsock Unix socket and performs the handshake -func dialVsock(ctx context.Context, vsockSocketPath string) (net.Conn, error) { - slog.DebugContext(ctx, "connecting to vsock", "socket", vsockSocketPath) - - // Use dial timeout, respecting context deadline if shorter - dialTimeout := vsockDialTimeout - if deadline, ok := ctx.Deadline(); ok { - if remaining := time.Until(deadline); remaining < dialTimeout { - dialTimeout = remaining - } - } - - // Connect to CH's Unix socket with timeout - dialer := net.Dialer{Timeout: dialTimeout} - conn, err := dialer.DialContext(ctx, "unix", vsockSocketPath) - if err != nil { - return nil, fmt.Errorf("dial vsock socket %s: %w", vsockSocketPath, err) - } - - slog.DebugContext(ctx, "connected to vsock socket, performing handshake", "port", vsockGuestPort) - - // Set deadline for handshake - if err := conn.SetDeadline(time.Now().Add(vsockHandshakeTimeout)); err != nil { - conn.Close() - return nil, fmt.Errorf("set handshake deadline: %w", err) - } - - // Perform Cloud Hypervisor vsock handshake - handshakeCmd := fmt.Sprintf("CONNECT %d\n", vsockGuestPort) - if _, err := conn.Write([]byte(handshakeCmd)); err != nil { - conn.Close() - return nil, fmt.Errorf("send vsock handshake: %w", err) - } - - // Read handshake response - reader := bufio.NewReader(conn) - response, err := reader.ReadString('\n') - if err != nil { - conn.Close() - return nil, fmt.Errorf("read vsock handshake response (is guest-agent running in guest?): %w", err) - } - - // Clear deadline after successful handshake - if err := conn.SetDeadline(time.Time{}); err != nil { - conn.Close() - return nil, fmt.Errorf("clear deadline: %w", err) - } - - response = strings.TrimSpace(response) - if !strings.HasPrefix(response, "OK ") { - conn.Close() - return nil, fmt.Errorf("vsock handshake failed: %s", response) - } - - slog.DebugContext(ctx, "vsock handshake successful", "response", response) - - // Return wrapped connection that uses the bufio.Reader - // This ensures any bytes buffered during handshake are not lost - return &bufferedConn{Conn: conn, reader: reader}, nil -} - // CopyToInstanceOptions configures a copy-to-instance operation type CopyToInstanceOptions struct { - SrcPath string // Local source path - DstPath string // Destination path in guest - Mode fs.FileMode // Optional: override file mode (0 = preserve source) + SrcPath string // Local source path + DstPath string // Destination path in guest + Mode fs.FileMode // Optional: override file mode (0 = preserve source) } -// CopyToInstance copies a file or directory to an instance via vsock -func CopyToInstance(ctx context.Context, vsockSocketPath string, opts CopyToInstanceOptions) error { - grpcConn, err := getOrCreateConn(ctx, vsockSocketPath) +// CopyToInstance copies a file or directory to an instance via vsock. +// The dialer is a hypervisor-specific VsockDialer that knows how to connect to the guest. +func CopyToInstance(ctx context.Context, dialer hypervisor.VsockDialer, opts CopyToInstanceOptions) error { + grpcConn, err := GetOrCreateConn(ctx, dialer) if err != nil { return fmt.Errorf("get grpc connection: %w", err) } @@ -310,12 +233,18 @@ func copyFileToInstance(ctx context.Context, client GuestServiceClient, srcPath, mode = srcInfo.Mode().Perm() } + f, err := os.Open(srcPath) + if err != nil { + return fmt.Errorf("open source: %w", err) + } + defer f.Close() + stream, err := client.CopyToGuest(ctx) if err != nil { return fmt.Errorf("start copy stream: %w", err) } - // Send start message + // Send start request if err := stream.Send(&CopyToGuestRequest{ Request: &CopyToGuestRequest_Start{ Start: &CopyToGuestStart{ @@ -330,16 +259,10 @@ func copyFileToInstance(ctx context.Context, client GuestServiceClient, srcPath, return fmt.Errorf("send start: %w", err) } - // Open and stream file content - file, err := os.Open(srcPath) - if err != nil { - return fmt.Errorf("open source: %w", err) - } - defer file.Close() - + // Stream file content buf := make([]byte, 32*1024) for { - n, err := file.Read(buf) + n, err := f.Read(buf) if n > 0 { if sendErr := stream.Send(&CopyToGuestRequest{ Request: &CopyToGuestRequest_Data{Data: buf[:n]}, @@ -355,16 +278,17 @@ func copyFileToInstance(ctx context.Context, client GuestServiceClient, srcPath, } } - // Send end message + // Send end marker if err := stream.Send(&CopyToGuestRequest{ Request: &CopyToGuestRequest_End{End: &CopyToGuestEnd{}}, }); err != nil { return fmt.Errorf("send end: %w", err) } + // Receive response resp, err := stream.CloseAndRecv() if err != nil { - return fmt.Errorf("close stream: %w", err) + return fmt.Errorf("receive response: %w", err) } if !resp.Success { @@ -376,31 +300,71 @@ func copyFileToInstance(ctx context.Context, client GuestServiceClient, srcPath, // copyDirToInstance copies a directory recursively to the instance func copyDirToInstance(ctx context.Context, client GuestServiceClient, srcPath, dstPath string) error { + srcPath = filepath.Clean(srcPath) + + // First create the destination directory + stream, err := client.CopyToGuest(ctx) + if err != nil { + return fmt.Errorf("start copy stream for dir: %w", err) + } + + srcInfo, err := os.Stat(srcPath) + if err != nil { + return fmt.Errorf("stat source dir: %w", err) + } + + if err := stream.Send(&CopyToGuestRequest{ + Request: &CopyToGuestRequest_Start{ + Start: &CopyToGuestStart{ + Path: dstPath, + Mode: uint32(srcInfo.Mode().Perm()), + IsDir: true, + Mtime: srcInfo.ModTime().Unix(), + }, + }, + }); err != nil { + return fmt.Errorf("send dir start: %w", err) + } + + if err := stream.Send(&CopyToGuestRequest{ + Request: &CopyToGuestRequest_End{End: &CopyToGuestEnd{}}, + }); err != nil { + return fmt.Errorf("send dir end: %w", err) + } + + resp, err := stream.CloseAndRecv() + if err != nil { + return fmt.Errorf("receive dir response: %w", err) + } + if !resp.Success { + return fmt.Errorf("create dir failed: %s", resp.Error) + } + + // Walk and copy contents return filepath.WalkDir(srcPath, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } + if path == srcPath { + return nil // Skip root, already created + } relPath, err := filepath.Rel(srcPath, path) if err != nil { - return fmt.Errorf("relative path: %w", err) + return fmt.Errorf("get relative path: %w", err) } - targetPath := filepath.Join(dstPath, relPath) - if targetPath == dstPath && relPath == "." { - targetPath = dstPath - } - - info, err := d.Info() - if err != nil { - return fmt.Errorf("info: %w", err) - } if d.IsDir() { - // Create directory + // Create subdirectory stream, err := client.CopyToGuest(ctx) if err != nil { - return fmt.Errorf("start copy stream: %w", err) + return fmt.Errorf("start copy stream for subdir: %w", err) + } + + info, err := d.Info() + if err != nil { + return fmt.Errorf("get dir info: %w", err) } if err := stream.Send(&CopyToGuestRequest{ @@ -413,22 +377,21 @@ func copyDirToInstance(ctx context.Context, client GuestServiceClient, srcPath, }, }, }); err != nil { - return fmt.Errorf("send start: %w", err) + return fmt.Errorf("send subdir start: %w", err) } if err := stream.Send(&CopyToGuestRequest{ Request: &CopyToGuestRequest_End{End: &CopyToGuestEnd{}}, }); err != nil { - return fmt.Errorf("send end: %w", err) + return fmt.Errorf("send subdir end: %w", err) } resp, err := stream.CloseAndRecv() if err != nil { - return fmt.Errorf("close stream: %w", err) + return fmt.Errorf("receive subdir response: %w", err) } - if !resp.Success { - return fmt.Errorf("create directory failed: %s", resp.Error) + return fmt.Errorf("create subdir failed: %s", resp.Error) } return nil } @@ -448,9 +411,10 @@ type CopyFromInstanceOptions struct { // FileHandler is called for each file received from the instance type FileHandler func(header *CopyFromGuestHeader, data io.Reader) error -// CopyFromInstance copies a file or directory from an instance via vsock -func CopyFromInstance(ctx context.Context, vsockSocketPath string, opts CopyFromInstanceOptions) error { - grpcConn, err := getOrCreateConn(ctx, vsockSocketPath) +// CopyFromInstance copies a file or directory from an instance via vsock. +// The dialer is a hypervisor-specific VsockDialer that knows how to connect to the guest. +func CopyFromInstance(ctx context.Context, dialer hypervisor.VsockDialer, opts CopyFromInstanceOptions) error { + grpcConn, err := GetOrCreateConn(ctx, dialer) if err != nil { return fmt.Errorf("get grpc connection: %w", err) } @@ -504,38 +468,38 @@ func CopyFromInstance(ctx context.Context, vsockSocketPath string, opts CopyFrom if err := os.MkdirAll(targetPath, fs.FileMode(r.Header.Mode)); err != nil { return fmt.Errorf("create directory %s: %w", targetPath, err) } - } else if r.Header.IsSymlink { - // Validate symlink target to prevent path traversal attacks - // Reject absolute paths - if filepath.IsAbs(r.Header.LinkTarget) { - return fmt.Errorf("invalid symlink target (absolute path not allowed): %s", r.Header.LinkTarget) - } - // Reject targets that escape the destination directory - // Resolve the link target relative to the symlink's parent directory - linkDir := filepath.Dir(targetPath) - resolvedTarget := filepath.Clean(filepath.Join(linkDir, r.Header.LinkTarget)) - cleanDst := filepath.Clean(opts.DstPath) - // Check path containment - handle root destination specially - var contained bool - if cleanDst == "/" { - // For root destination, any absolute path that doesn't contain ".." after cleaning is valid - contained = !strings.Contains(resolvedTarget, "..") - } else { - contained = strings.HasPrefix(resolvedTarget, cleanDst+string(filepath.Separator)) || resolvedTarget == cleanDst - } - if !contained { - return fmt.Errorf("invalid symlink target (escapes destination): %s", r.Header.LinkTarget) - } + } else if r.Header.IsSymlink { + // Validate symlink target to prevent path traversal attacks + // Reject absolute paths + if filepath.IsAbs(r.Header.LinkTarget) { + return fmt.Errorf("invalid symlink target (absolute path not allowed): %s", r.Header.LinkTarget) + } + // Reject targets that escape the destination directory + // Resolve the link target relative to the symlink's parent directory + linkDir := filepath.Dir(targetPath) + resolvedTarget := filepath.Clean(filepath.Join(linkDir, r.Header.LinkTarget)) + cleanDst := filepath.Clean(opts.DstPath) + // Check path containment - handle root destination specially + var contained bool + if cleanDst == "/" { + // For root destination, any absolute path that doesn't contain ".." after cleaning is valid + contained = !strings.Contains(resolvedTarget, "..") + } else { + contained = strings.HasPrefix(resolvedTarget, cleanDst+string(filepath.Separator)) || resolvedTarget == cleanDst + } + if !contained { + return fmt.Errorf("invalid symlink target (escapes destination): %s", r.Header.LinkTarget) + } - // Create parent directory if needed - if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { - return fmt.Errorf("create parent dir for symlink: %w", err) - } - // Create symlink - os.Remove(targetPath) // Remove existing if any - if err := os.Symlink(r.Header.LinkTarget, targetPath); err != nil { - return fmt.Errorf("create symlink %s: %w", targetPath, err) - } + // Create parent directory if needed + if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { + return fmt.Errorf("create parent dir for symlink: %w", err) + } + // Create symlink + os.Remove(targetPath) // Remove existing if any + if err := os.Symlink(r.Header.LinkTarget, targetPath); err != nil { + return fmt.Errorf("create symlink %s: %w", targetPath, err) + } } else { // Create parent directory if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { @@ -585,4 +549,3 @@ func CopyFromInstance(ctx context.Context, vsockSocketPath string, opts CopyFrom } return nil } - diff --git a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go index effcc596..4410ff43 100644 --- a/lib/hypervisor/cloudhypervisor/cloudhypervisor.go +++ b/lib/hypervisor/cloudhypervisor/cloudhypervisor.go @@ -27,6 +27,9 @@ func New(socketPath string) (*CloudHypervisor, error) { }, nil } +// Verify CloudHypervisor implements the interface +var _ hypervisor.Hypervisor = (*CloudHypervisor)(nil) + // Capabilities returns the features supported by Cloud Hypervisor. func (c *CloudHypervisor) Capabilities() hypervisor.Capabilities { return hypervisor.Capabilities{ @@ -38,31 +41,6 @@ func (c *CloudHypervisor) Capabilities() hypervisor.Capabilities { } } -// CreateVM configures the VM in Cloud Hypervisor. -func (c *CloudHypervisor) CreateVM(ctx context.Context, config hypervisor.VMConfig) error { - vmConfig := ToVMConfig(config) - resp, err := c.client.CreateVMWithResponse(ctx, vmConfig) - if err != nil { - return fmt.Errorf("create vm: %w", err) - } - if resp.StatusCode() != 204 { - return fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body)) - } - return nil -} - -// BootVM starts the configured VM. -func (c *CloudHypervisor) BootVM(ctx context.Context) error { - resp, err := c.client.BootVMWithResponse(ctx) - if err != nil { - return fmt.Errorf("boot vm: %w", err) - } - if resp.StatusCode() != 204 { - return fmt.Errorf("boot vm failed with status %d: %s", resp.StatusCode(), string(resp.Body)) - } - return nil -} - // DeleteVM removes the VM configuration from Cloud Hypervisor. func (c *CloudHypervisor) DeleteVM(ctx context.Context) error { resp, err := c.client.DeleteVMWithResponse(ctx) @@ -157,23 +135,6 @@ func (c *CloudHypervisor) Snapshot(ctx context.Context, destPath string) error { return nil } -// Restore loads a VM from snapshot. -func (c *CloudHypervisor) Restore(ctx context.Context, sourcePath string) error { - sourceURL := "file://" + sourcePath - restoreConfig := vmm.RestoreConfig{ - SourceUrl: sourceURL, - Prefault: ptr(false), - } - resp, err := c.client.PutVmRestoreWithResponse(ctx, restoreConfig) - if err != nil { - return fmt.Errorf("restore: %w", err) - } - if resp.StatusCode() != 204 { - return fmt.Errorf("restore failed with status %d", resp.StatusCode()) - } - return nil -} - // ResizeMemory changes the VM's memory allocation. func (c *CloudHypervisor) ResizeMemory(ctx context.Context, bytes int64) error { resizeConfig := vmm.VmResize{DesiredRam: &bytes} @@ -239,7 +200,3 @@ func (c *CloudHypervisor) ResizeMemoryAndWait(ctx context.Context, bytes int64, // Timeout reached, but resize was requested successfully return nil } - -func ptr[T any](v T) *T { - return &v -} diff --git a/lib/hypervisor/cloudhypervisor/process.go b/lib/hypervisor/cloudhypervisor/process.go index 705c91c2..bd48718b 100644 --- a/lib/hypervisor/cloudhypervisor/process.go +++ b/lib/hypervisor/cloudhypervisor/process.go @@ -3,55 +3,146 @@ package cloudhypervisor import ( "context" "fmt" + "syscall" "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/vmm" + "gvisor.dev/gvisor/pkg/cleanup" ) func init() { hypervisor.RegisterSocketName(hypervisor.TypeCloudHypervisor, "ch.sock") } -// ProcessManager implements hypervisor.ProcessManager for Cloud Hypervisor. -type ProcessManager struct{} +// Starter implements hypervisor.VMStarter for Cloud Hypervisor. +type Starter struct{} -// NewProcessManager creates a new Cloud Hypervisor process manager. -func NewProcessManager() *ProcessManager { - return &ProcessManager{} +// NewStarter creates a new Cloud Hypervisor starter. +func NewStarter() *Starter { + return &Starter{} } -// Verify ProcessManager implements the interface -var _ hypervisor.ProcessManager = (*ProcessManager)(nil) +// Verify Starter implements the interface +var _ hypervisor.VMStarter = (*Starter)(nil) // SocketName returns the socket filename for Cloud Hypervisor. -func (p *ProcessManager) SocketName() string { +func (s *Starter) SocketName() string { return "ch.sock" } -// StartProcess launches a Cloud Hypervisor VMM process. -func (p *ProcessManager) StartProcess(ctx context.Context, paths *paths.Paths, version string, socketPath string) (int, error) { +// GetBinaryPath returns the path to the Cloud Hypervisor binary. +func (s *Starter) GetBinaryPath(p *paths.Paths, version string) (string, error) { chVersion := vmm.CHVersion(version) if !vmm.IsVersionSupported(chVersion) { - return 0, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) + return "", fmt.Errorf("unsupported cloud-hypervisor version: %s", version) } - return vmm.StartProcess(ctx, paths, chVersion, socketPath) + return vmm.GetBinaryPath(p, chVersion) +} + +// GetVersion returns the latest supported Cloud Hypervisor version. +// Cloud Hypervisor binaries are embedded, so we return the latest known version. +func (s *Starter) GetVersion(p *paths.Paths) (string, error) { + return string(vmm.V49_0), nil } -// StartProcessWithArgs launches a Cloud Hypervisor VMM process with extra arguments. -func (p *ProcessManager) StartProcessWithArgs(ctx context.Context, paths *paths.Paths, version string, socketPath string, extraArgs []string) (int, error) { +// StartVM launches Cloud Hypervisor, configures the VM, and boots it. +// Returns the process ID and a Hypervisor client for subsequent operations. +func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config hypervisor.VMConfig) (int, hypervisor.Hypervisor, error) { + // Validate version chVersion := vmm.CHVersion(version) if !vmm.IsVersionSupported(chVersion) { - return 0, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) + return 0, nil, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) + } + + // 1. Start the Cloud Hypervisor process + pid, err := vmm.StartProcess(ctx, p, chVersion, socketPath) + if err != nil { + return 0, nil, fmt.Errorf("start process: %w", err) + } + + // Setup cleanup to kill the process if subsequent steps fail + cu := cleanup.Make(func() { + syscall.Kill(pid, syscall.SIGKILL) + }) + defer cu.Clean() + + // 2. Create the HTTP client + hv, err := New(socketPath) + if err != nil { + return 0, nil, fmt.Errorf("create client: %w", err) + } + + // 3. Configure the VM via HTTP API + vmConfig := ToVMConfig(config) + resp, err := hv.client.CreateVMWithResponse(ctx, vmConfig) + if err != nil { + return 0, nil, fmt.Errorf("create vm: %w", err) + } + if resp.StatusCode() != 204 { + return 0, nil, fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body)) + } + + // 4. Boot the VM via HTTP API + bootResp, err := hv.client.BootVMWithResponse(ctx) + if err != nil { + return 0, nil, fmt.Errorf("boot vm: %w", err) + } + if bootResp.StatusCode() != 204 { + return 0, nil, fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), string(bootResp.Body)) } - return vmm.StartProcessWithArgs(ctx, paths, chVersion, socketPath, extraArgs) + + // Success - release cleanup to prevent killing the process + cu.Release() + return pid, hv, nil } -// GetBinaryPath returns the path to the Cloud Hypervisor binary. -func (p *ProcessManager) GetBinaryPath(paths *paths.Paths, version string) (string, error) { +// RestoreVM starts Cloud Hypervisor and restores VM state from a snapshot. +// The VM is in paused state after restore; caller should call Resume() to continue execution. +func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) { + // Validate version chVersion := vmm.CHVersion(version) if !vmm.IsVersionSupported(chVersion) { - return "", fmt.Errorf("unsupported cloud-hypervisor version: %s", version) + return 0, nil, fmt.Errorf("unsupported cloud-hypervisor version: %s", version) + } + + // 1. Start the Cloud Hypervisor process + pid, err := vmm.StartProcess(ctx, p, chVersion, socketPath) + if err != nil { + return 0, nil, fmt.Errorf("start process: %w", err) + } + + // Setup cleanup to kill the process if subsequent steps fail + cu := cleanup.Make(func() { + syscall.Kill(pid, syscall.SIGKILL) + }) + defer cu.Clean() + + // 2. Create the HTTP client + hv, err := New(socketPath) + if err != nil { + return 0, nil, fmt.Errorf("create client: %w", err) } - return vmm.GetBinaryPath(paths, chVersion) + + // 3. Restore from snapshot via HTTP API + sourceURL := "file://" + snapshotPath + restoreConfig := vmm.RestoreConfig{ + SourceUrl: sourceURL, + Prefault: ptr(false), + } + resp, err := hv.client.PutVmRestoreWithResponse(ctx, restoreConfig) + if err != nil { + return 0, nil, fmt.Errorf("restore: %w", err) + } + if resp.StatusCode() != 204 { + return 0, nil, fmt.Errorf("restore failed with status %d: %s", resp.StatusCode(), string(resp.Body)) + } + + // Success - release cleanup to prevent killing the process + cu.Release() + return pid, hv, nil +} + +func ptr[T any](v T) *T { + return &v } diff --git a/lib/hypervisor/cloudhypervisor/vsock.go b/lib/hypervisor/cloudhypervisor/vsock.go new file mode 100644 index 00000000..a29b04b2 --- /dev/null +++ b/lib/hypervisor/cloudhypervisor/vsock.go @@ -0,0 +1,118 @@ +package cloudhypervisor + +import ( + "bufio" + "context" + "fmt" + "log/slog" + "net" + "strings" + "time" + + "github.com/onkernel/hypeman/lib/hypervisor" +) + +const ( + // vsockDialTimeout is the timeout for connecting to the vsock Unix socket + vsockDialTimeout = 5 * time.Second + // vsockHandshakeTimeout is the timeout for the Cloud Hypervisor vsock handshake + vsockHandshakeTimeout = 5 * time.Second +) + +func init() { + hypervisor.RegisterVsockDialerFactory(hypervisor.TypeCloudHypervisor, NewVsockDialer) +} + +// VsockDialer implements hypervisor.VsockDialer for Cloud Hypervisor. +// Cloud Hypervisor exposes vsock through a Unix socket file with a text-based +// handshake protocol (CONNECT {port}\n / OK ...). +type VsockDialer struct { + socketPath string +} + +// NewVsockDialer creates a new VsockDialer for Cloud Hypervisor. +// The vsockSocket parameter is the path to the Unix socket file. +// The vsockCID parameter is unused for Cloud Hypervisor (it uses socket path instead). +func NewVsockDialer(vsockSocket string, vsockCID int64) hypervisor.VsockDialer { + return &VsockDialer{ + socketPath: vsockSocket, + } +} + +// Key returns a unique identifier for this dialer, used for connection pooling. +func (d *VsockDialer) Key() string { + return "ch:" + d.socketPath +} + +// DialVsock connects to the guest on the specified port. +// It connects to the Cloud Hypervisor Unix socket and performs the handshake protocol. +func (d *VsockDialer) DialVsock(ctx context.Context, port int) (net.Conn, error) { + slog.DebugContext(ctx, "connecting to vsock", "socket", d.socketPath, "port", port) + + // Use dial timeout, respecting context deadline if shorter + dialTimeout := vsockDialTimeout + if deadline, ok := ctx.Deadline(); ok { + if remaining := time.Until(deadline); remaining < dialTimeout { + dialTimeout = remaining + } + } + + // Connect to CH's Unix socket with timeout + dialer := net.Dialer{Timeout: dialTimeout} + conn, err := dialer.DialContext(ctx, "unix", d.socketPath) + if err != nil { + return nil, fmt.Errorf("dial vsock socket %s: %w", d.socketPath, err) + } + + slog.DebugContext(ctx, "connected to vsock socket, performing handshake", "port", port) + + // Set deadline for handshake + if err := conn.SetDeadline(time.Now().Add(vsockHandshakeTimeout)); err != nil { + conn.Close() + return nil, fmt.Errorf("set handshake deadline: %w", err) + } + + // Perform Cloud Hypervisor vsock handshake + handshakeCmd := fmt.Sprintf("CONNECT %d\n", port) + if _, err := conn.Write([]byte(handshakeCmd)); err != nil { + conn.Close() + return nil, fmt.Errorf("send vsock handshake: %w", err) + } + + // Read handshake response + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + conn.Close() + return nil, fmt.Errorf("read vsock handshake response (is exec-agent running in guest?): %w", err) + } + + // Clear deadline after successful handshake + if err := conn.SetDeadline(time.Time{}); err != nil { + conn.Close() + return nil, fmt.Errorf("clear deadline: %w", err) + } + + response = strings.TrimSpace(response) + if !strings.HasPrefix(response, "OK ") { + conn.Close() + return nil, fmt.Errorf("vsock handshake failed: %s", response) + } + + slog.DebugContext(ctx, "vsock handshake successful", "response", response) + + // Return wrapped connection that uses the bufio.Reader + // This ensures any bytes buffered during handshake are not lost + return &bufferedConn{Conn: conn, reader: reader}, nil +} + +// bufferedConn wraps a net.Conn with a bufio.Reader to ensure any buffered +// data from the handshake is properly drained before reading from the connection +type bufferedConn struct { + net.Conn + reader *bufio.Reader +} + +func (c *bufferedConn) Read(p []byte) (int, error) { + return c.reader.Read(p) +} diff --git a/lib/hypervisor/hypervisor.go b/lib/hypervisor/hypervisor.go index 2b93b0bf..a92f832e 100644 --- a/lib/hypervisor/hypervisor.go +++ b/lib/hypervisor/hypervisor.go @@ -5,6 +5,8 @@ package hypervisor import ( "context" + "fmt" + "net" "time" "github.com/onkernel/hypeman/lib/paths" @@ -16,7 +18,8 @@ type Type string const ( // TypeCloudHypervisor is the Cloud Hypervisor VMM TypeCloudHypervisor Type = "cloud-hypervisor" - // Future: TypeQEMU Type = "qemu" + // TypeQEMU is the QEMU VMM + TypeQEMU Type = "qemu" ) // socketNames maps hypervisor types to their socket filenames. @@ -38,19 +41,39 @@ func SocketNameForType(t Type) string { return string(t) + ".sock" } -// Hypervisor defines the interface for VM management operations. -// All hypervisor implementations must implement this interface. -type Hypervisor interface { - // CreateVM configures the VM with the given configuration. - // The VM is not started yet after this call. - CreateVM(ctx context.Context, config VMConfig) error +// VMStarter handles the full VM startup sequence. +// Each hypervisor implements its own startup flow: +// - Cloud Hypervisor: starts process, configures via HTTP API, boots via HTTP API +// - QEMU: converts config to command-line args, starts process (VM runs immediately) +type VMStarter interface { + // SocketName returns the socket filename for this hypervisor. + // Uses short names to stay within Unix socket path length limits (SUN_LEN ~108 bytes). + SocketName() string - // BootVM starts the configured VM. - // Must be called after CreateVM. - BootVM(ctx context.Context) error + // GetBinaryPath returns the path to the hypervisor binary, extracting if needed. + GetBinaryPath(p *paths.Paths, version string) (string, error) - // DeleteVM removes the VM configuration. - // The VMM process may still be running after this call. + // GetVersion returns the version of the hypervisor binary. + // For embedded binaries (Cloud Hypervisor), returns the latest supported version. + // For system binaries (QEMU), queries the installed binary for its version. + GetVersion(p *paths.Paths) (string, error) + + // StartVM launches the hypervisor process and boots the VM. + // Returns the process ID and a Hypervisor client for subsequent operations. + StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config VMConfig) (pid int, hv Hypervisor, err error) + + // RestoreVM starts the hypervisor and restores VM state from a snapshot. + // Each hypervisor implements its own restore flow: + // - Cloud Hypervisor: starts process, calls Restore API + // - QEMU: would start with -incoming or -loadvm flags (not yet implemented) + // Returns the process ID and a Hypervisor client. The VM is in paused state after restore. + RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (pid int, hv Hypervisor, err error) +} + +// Hypervisor defines the interface for VM control operations. +// A Hypervisor client is returned by VMStarter.StartVM after the VM is running. +type Hypervisor interface { + // DeleteVM sends a graceful shutdown signal to the guest. DeleteVM(ctx context.Context) error // Shutdown stops the VMM process gracefully. @@ -71,10 +94,6 @@ type Hypervisor interface { // Check Capabilities().SupportsSnapshot before calling. Snapshot(ctx context.Context, destPath string) error - // Restore loads a VM from a snapshot at the given path. - // Check Capabilities().SupportsSnapshot before calling. - Restore(ctx context.Context, sourcePath string) error - // ResizeMemory changes the VM's memory allocation. // Check Capabilities().SupportsHotplugMemory before calling. ResizeMemory(ctx context.Context, bytes int64) error @@ -107,21 +126,38 @@ type Capabilities struct { SupportsGPUPassthrough bool } -// ProcessManager handles hypervisor process lifecycle. -// This is separate from the Hypervisor interface because process management -// happens before/after the VMM socket is available. -type ProcessManager interface { - // SocketName returns the socket filename for this hypervisor. - // Uses short names to stay within Unix socket path length limits (SUN_LEN ~108 bytes). - SocketName() string +// VsockDialer provides vsock connectivity to a guest VM. +// Each hypervisor implements its own connection method: +// - Cloud Hypervisor: Unix socket file + text handshake protocol +// - QEMU: Kernel AF_VSOCK with CID-based addressing +type VsockDialer interface { + // DialVsock connects to the guest on the specified port. + // Returns a net.Conn that can be used for bidirectional communication. + DialVsock(ctx context.Context, port int) (net.Conn, error) + + // Key returns a unique identifier for this dialer, used for connection pooling. + Key() string +} - // StartProcess launches the hypervisor process. - // Returns the process ID of the started hypervisor. - StartProcess(ctx context.Context, p *paths.Paths, version string, socketPath string) (pid int, err error) +// VsockDialerFactory creates VsockDialer instances for a hypervisor type. +type VsockDialerFactory func(vsockSocket string, vsockCID int64) VsockDialer - // StartProcessWithArgs launches the hypervisor process with extra arguments. - StartProcessWithArgs(ctx context.Context, p *paths.Paths, version string, socketPath string, extraArgs []string) (pid int, err error) +// vsockDialerFactories maps hypervisor types to their dialer factories. +// Registered by each hypervisor package's init() function. +var vsockDialerFactories = make(map[Type]VsockDialerFactory) - // GetBinaryPath returns the path to the hypervisor binary, extracting if needed. - GetBinaryPath(p *paths.Paths, version string) (string, error) +// RegisterVsockDialerFactory registers a VsockDialer factory for a hypervisor type. +// Called by each hypervisor implementation's init() function. +func RegisterVsockDialerFactory(t Type, factory VsockDialerFactory) { + vsockDialerFactories[t] = factory +} + +// NewVsockDialer creates a VsockDialer for the given hypervisor type. +// Returns an error if the hypervisor type doesn't have a registered factory. +func NewVsockDialer(hvType Type, vsockSocket string, vsockCID int64) (VsockDialer, error) { + factory, ok := vsockDialerFactories[hvType] + if !ok { + return nil, fmt.Errorf("no vsock dialer registered for hypervisor type: %s", hvType) + } + return factory(vsockSocket, vsockCID), nil } diff --git a/lib/hypervisor/qemu/config.go b/lib/hypervisor/qemu/config.go new file mode 100644 index 00000000..5f3b457f --- /dev/null +++ b/lib/hypervisor/qemu/config.go @@ -0,0 +1,91 @@ +package qemu + +import ( + "fmt" + "runtime" + "strconv" + + "github.com/onkernel/hypeman/lib/hypervisor" +) + +// BuildArgs converts hypervisor.VMConfig to QEMU command-line arguments. +func BuildArgs(cfg hypervisor.VMConfig) []string { + args := make([]string, 0, 64) + + // Machine type with KVM acceleration (arch-specific) + args = append(args, "-machine", machineType()) + + // CPU configuration + args = append(args, "-cpu", "host") + args = append(args, "-smp", strconv.Itoa(cfg.VCPUs)) + + // Memory configuration + memMB := cfg.MemoryBytes / (1024 * 1024) + args = append(args, "-m", fmt.Sprintf("%dM", memMB)) + + // Kernel and initrd + if cfg.KernelPath != "" { + args = append(args, "-kernel", cfg.KernelPath) + } + if cfg.InitrdPath != "" { + args = append(args, "-initrd", cfg.InitrdPath) + } + if cfg.KernelArgs != "" { + args = append(args, "-append", cfg.KernelArgs) + } + + // Disk configuration + for i, disk := range cfg.Disks { + driveOpts := fmt.Sprintf("file=%s,format=raw,if=none,id=drive%d", disk.Path, i) + if disk.Readonly { + driveOpts += ",readonly=on" + } + args = append(args, "-drive", driveOpts) + args = append(args, "-device", fmt.Sprintf("virtio-blk-pci,drive=drive%d", i)) + } + + // Network configuration + for i, net := range cfg.Networks { + netdevOpts := fmt.Sprintf("tap,id=net%d,ifname=%s,script=no,downscript=no", i, net.TAPDevice) + args = append(args, "-netdev", netdevOpts) + + deviceOpts := fmt.Sprintf("virtio-net-pci,netdev=net%d,mac=%s", i, net.MAC) + args = append(args, "-device", deviceOpts) + } + + // Vsock configuration + if cfg.VsockCID > 0 { + args = append(args, "-device", fmt.Sprintf("vhost-vsock-pci,guest-cid=%d", cfg.VsockCID)) + } + + // PCI device passthrough (GPU, etc.) + for _, pciAddr := range cfg.PCIDevices { + args = append(args, "-device", fmt.Sprintf("vfio-pci,host=%s", pciAddr)) + } + + // Serial console output to file + if cfg.SerialLogPath != "" { + args = append(args, "-serial", fmt.Sprintf("file:%s", cfg.SerialLogPath)) + } else { + args = append(args, "-serial", "stdio") + } + + // No graphics + args = append(args, "-nographic") + + // Disable default devices we don't need + args = append(args, "-nodefaults") + + return args +} + +// machineType returns the QEMU machine type for the host architecture. +func machineType() string { + switch runtime.GOARCH { + case "arm64": + return "virt,accel=kvm" + default: + // x86_64 and others use q35 + return "q35,accel=kvm" + } +} diff --git a/lib/hypervisor/qemu/config_test.go b/lib/hypervisor/qemu/config_test.go new file mode 100644 index 00000000..bc6e9be3 --- /dev/null +++ b/lib/hypervisor/qemu/config_test.go @@ -0,0 +1,161 @@ +package qemu + +import ( + "testing" + + "github.com/onkernel/hypeman/lib/hypervisor" + "github.com/stretchr/testify/assert" +) + +func TestBuildArgs_Basic(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 2, + MemoryBytes: 1024 * 1024 * 1024, // 1GB + KernelPath: "/path/to/vmlinux", + InitrdPath: "/path/to/initrd", + KernelArgs: "console=ttyS0", + } + + args := BuildArgs(cfg) + + // Check machine type (arch-dependent) + assert.Contains(t, args, "-machine") + assert.Contains(t, args, machineType()) + + // Check CPU + assert.Contains(t, args, "-cpu") + assert.Contains(t, args, "host") + assert.Contains(t, args, "-smp") + assert.Contains(t, args, "2") + + // Check memory + assert.Contains(t, args, "-m") + assert.Contains(t, args, "1024M") + + // Check kernel + assert.Contains(t, args, "-kernel") + assert.Contains(t, args, "/path/to/vmlinux") + + // Check initrd + assert.Contains(t, args, "-initrd") + assert.Contains(t, args, "/path/to/initrd") + + // Check kernel args + assert.Contains(t, args, "-append") + assert.Contains(t, args, "console=ttyS0") + + // Check nographic + assert.Contains(t, args, "-nographic") +} + +func TestBuildArgs_Disks(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + Disks: []hypervisor.DiskConfig{ + {Path: "/path/to/rootfs.ext4", Readonly: false}, + {Path: "/path/to/data.ext4", Readonly: true}, + }, + } + + args := BuildArgs(cfg) + + // Check first disk (writable) + assert.Contains(t, args, "-drive") + foundDrive0 := false + foundDrive1 := false + for _, arg := range args { + if arg == "file=/path/to/rootfs.ext4,format=raw,if=none,id=drive0" { + foundDrive0 = true + } + if arg == "file=/path/to/data.ext4,format=raw,if=none,id=drive1,readonly=on" { + foundDrive1 = true + } + } + assert.True(t, foundDrive0, "Expected writable drive0") + assert.True(t, foundDrive1, "Expected readonly drive1") + + // Check virtio-blk devices + assert.Contains(t, args, "virtio-blk-pci,drive=drive0") + assert.Contains(t, args, "virtio-blk-pci,drive=drive1") +} + +func TestBuildArgs_Network(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + Networks: []hypervisor.NetworkConfig{ + { + TAPDevice: "tap0", + MAC: "02:00:00:ab:cd:ef", + IP: "192.168.1.10", + Netmask: "255.255.255.0", + }, + }, + } + + args := BuildArgs(cfg) + + // Check netdev + foundNetdev := false + for _, arg := range args { + if arg == "tap,id=net0,ifname=tap0,script=no,downscript=no" { + foundNetdev = true + } + } + assert.True(t, foundNetdev, "Expected tap netdev") + + // Check virtio-net device with MAC + assert.Contains(t, args, "virtio-net-pci,netdev=net0,mac=02:00:00:ab:cd:ef") +} + +func TestBuildArgs_Vsock(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + VsockCID: 123, + } + + args := BuildArgs(cfg) + + assert.Contains(t, args, "-device") + assert.Contains(t, args, "vhost-vsock-pci,guest-cid=123") +} + +func TestBuildArgs_PCIPassthrough(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + PCIDevices: []string{"0000:01:00.0", "0000:02:00.0"}, + } + + args := BuildArgs(cfg) + + assert.Contains(t, args, "vfio-pci,host=0000:01:00.0") + assert.Contains(t, args, "vfio-pci,host=0000:02:00.0") +} + +func TestBuildArgs_SerialLog(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + SerialLogPath: "/var/log/app.log", + } + + args := BuildArgs(cfg) + + assert.Contains(t, args, "-serial") + assert.Contains(t, args, "file:/var/log/app.log") +} + +func TestBuildArgs_NoSerialLog(t *testing.T) { + cfg := hypervisor.VMConfig{ + VCPUs: 1, + MemoryBytes: 512 * 1024 * 1024, + } + + args := BuildArgs(cfg) + + assert.Contains(t, args, "-serial") + assert.Contains(t, args, "stdio") +} diff --git a/lib/hypervisor/qemu/pool.go b/lib/hypervisor/qemu/pool.go new file mode 100644 index 00000000..398ce63e --- /dev/null +++ b/lib/hypervisor/qemu/pool.go @@ -0,0 +1,59 @@ +package qemu + +import ( + "sync" +) + +// clientPool manages singleton QMP connections per socket path. +// QEMU's QMP socket only allows one connection at a time, so we must +// reuse existing connections rather than creating new ones. +var clientPool = struct { + sync.RWMutex + clients map[string]*QEMU +}{ + clients: make(map[string]*QEMU), +} + +// GetOrCreate returns an existing QEMU client for the socket path, +// or creates a new one if none exists. +func GetOrCreate(socketPath string) (*QEMU, error) { + // Try read lock first for existing connection + clientPool.RLock() + if client, ok := clientPool.clients[socketPath]; ok { + clientPool.RUnlock() + return client, nil + } + clientPool.RUnlock() + + // Need to create new connection - acquire write lock + clientPool.Lock() + defer clientPool.Unlock() + + // Double-check after acquiring write lock + if client, ok := clientPool.clients[socketPath]; ok { + return client, nil + } + + // Create new client + client, err := newClient(socketPath) + if err != nil { + return nil, err + } + + clientPool.clients[socketPath] = client + return client, nil +} + +// Remove closes and removes a client from the pool. +// Called automatically on errors to allow fresh reconnection. +// Close is done asynchronously to avoid blocking if the connection is in a bad state. +func Remove(socketPath string) { + clientPool.Lock() + defer clientPool.Unlock() + + if client, ok := clientPool.clients[socketPath]; ok { + delete(clientPool.clients, socketPath) + // Close asynchronously to avoid blocking on stuck connections + go client.client.Close() + } +} diff --git a/lib/hypervisor/qemu/process.go b/lib/hypervisor/qemu/process.go new file mode 100644 index 00000000..a0ebfb90 --- /dev/null +++ b/lib/hypervisor/qemu/process.go @@ -0,0 +1,228 @@ +// Package qemu implements the hypervisor.Hypervisor interface for QEMU. +package qemu + +import ( + "context" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "regexp" + "runtime" + "syscall" + "time" + + "github.com/onkernel/hypeman/lib/hypervisor" + "github.com/onkernel/hypeman/lib/paths" + "gvisor.dev/gvisor/pkg/cleanup" +) + +func init() { + hypervisor.RegisterSocketName(hypervisor.TypeQEMU, "qemu.sock") +} + +// Starter implements hypervisor.VMStarter for QEMU. +type Starter struct{} + +// NewStarter creates a new QEMU starter. +func NewStarter() *Starter { + return &Starter{} +} + +// Verify Starter implements the interface +var _ hypervisor.VMStarter = (*Starter)(nil) + +// SocketName returns the socket filename for QEMU. +func (s *Starter) SocketName() string { + return "qemu.sock" +} + +// GetBinaryPath returns the path to the QEMU binary. +// QEMU is expected to be installed on the system. +func (s *Starter) GetBinaryPath(p *paths.Paths, version string) (string, error) { + binaryName, err := qemuBinaryName() + if err != nil { + return "", err + } + + candidates := []string{ + "/usr/bin/" + binaryName, + "/usr/local/bin/" + binaryName, + } + + for _, path := range candidates { + if _, err := os.Stat(path); err == nil { + return path, nil + } + } + + if path, err := exec.LookPath(binaryName); err == nil { + return path, nil + } + + return "", fmt.Errorf("%s not found; install with: %s", binaryName, qemuInstallHint()) +} + +// GetVersion returns the version of the installed QEMU binary. +// Parses the output of "qemu-system-* --version" to extract the version string. +func (s *Starter) GetVersion(p *paths.Paths) (string, error) { + binaryPath, err := s.GetBinaryPath(p, "") + if err != nil { + return "", err + } + + cmd := exec.Command(binaryPath, "--version") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("get qemu version: %w", err) + } + + // Parse "QEMU emulator version 8.2.0 (Debian ...)" -> "8.2.0" + re := regexp.MustCompile(`version (\d+\.\d+(?:\.\d+)?)`) + matches := re.FindStringSubmatch(string(output)) + if len(matches) >= 2 { + return matches[1], nil + } + + return "", fmt.Errorf("could not parse QEMU version from: %s", string(output)) +} + +// StartVM launches QEMU with the VM configuration and returns a Hypervisor client. +// QEMU receives all configuration via command-line arguments at process start. +func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config hypervisor.VMConfig) (int, hypervisor.Hypervisor, error) { + // Get binary path + binaryPath, err := s.GetBinaryPath(p, version) + if err != nil { + return 0, nil, fmt.Errorf("get binary: %w", err) + } + + // Check if socket is already in use + if isSocketInUse(socketPath) { + return 0, nil, fmt.Errorf("socket already in use, QEMU may be running at %s", socketPath) + } + + // Remove stale socket if exists + os.Remove(socketPath) + + // Build command arguments: QMP socket + VM configuration + args := []string{ + "-chardev", fmt.Sprintf("socket,id=qmp,path=%s,server=on,wait=off", socketPath), + "-mon", "chardev=qmp,mode=control", + } + // Append VM configuration as command-line arguments + args = append(args, BuildArgs(config)...) + + // Create command + cmd := exec.Command(binaryPath, args...) + + // Daemonize: detach from parent process group + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + // Redirect stdout/stderr to VMM log file + instanceDir := filepath.Dir(socketPath) + logsDir := filepath.Join(instanceDir, "logs") + if err := os.MkdirAll(logsDir, 0755); err != nil { + return 0, nil, fmt.Errorf("create logs directory: %w", err) + } + + vmmLogFile, err := os.OpenFile( + filepath.Join(logsDir, "vmm.log"), + os.O_CREATE|os.O_WRONLY|os.O_APPEND, + 0644, + ) + if err != nil { + return 0, nil, fmt.Errorf("create vmm log: %w", err) + } + defer vmmLogFile.Close() + + cmd.Stdout = vmmLogFile + cmd.Stderr = vmmLogFile + + if err := cmd.Start(); err != nil { + return 0, nil, fmt.Errorf("start qemu: %w", err) + } + + pid := cmd.Process.Pid + + // Setup cleanup to kill the process if subsequent steps fail + cu := cleanup.Make(func() { + syscall.Kill(pid, syscall.SIGKILL) + }) + defer cu.Clean() + + // Wait for socket to be ready + if err := waitForSocket(socketPath, 10*time.Second); err != nil { + vmmLogPath := filepath.Join(logsDir, "vmm.log") + if logData, readErr := os.ReadFile(vmmLogPath); readErr == nil && len(logData) > 0 { + return 0, nil, fmt.Errorf("%w; vmm.log: %s", err, string(logData)) + } + return 0, nil, err + } + + // Create QMP client + hv, err := New(socketPath) + if err != nil { + return 0, nil, fmt.Errorf("create client: %w", err) + } + + // Success - release cleanup to prevent killing the process + cu.Release() + return pid, hv, nil +} + +// RestoreVM starts QEMU and restores VM state from a snapshot. +// Not yet implemented for QEMU. +func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) { + return 0, nil, fmt.Errorf("restore not supported by QEMU implementation") +} + +// qemuBinaryName returns the QEMU binary name for the host architecture. +func qemuBinaryName() (string, error) { + switch runtime.GOARCH { + case "amd64": + return "qemu-system-x86_64", nil + case "arm64": + return "qemu-system-aarch64", nil + default: + return "", fmt.Errorf("unsupported architecture: %s", runtime.GOARCH) + } +} + +// qemuInstallHint returns package installation hints for the current architecture. +func qemuInstallHint() string { + switch runtime.GOARCH { + case "amd64": + return "apt install qemu-system-x86 (Debian/Ubuntu) or dnf install qemu-system-x86-core (Fedora)" + case "arm64": + return "apt install qemu-system-arm (Debian/Ubuntu) or dnf install qemu-system-aarch64-core (Fedora)" + default: + return "install QEMU for your platform" + } +} + +// isSocketInUse checks if a Unix socket is actively being used +func isSocketInUse(socketPath string) bool { + conn, err := net.DialTimeout("unix", socketPath, 100*time.Millisecond) + if err != nil { + return false + } + conn.Close() + return true +} + +// waitForSocket waits for the QMP socket to become available +func waitForSocket(socketPath string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + conn, err := net.DialTimeout("unix", socketPath, 100*time.Millisecond) + if err == nil { + conn.Close() + return nil + } + time.Sleep(50 * time.Millisecond) + } + return fmt.Errorf("timeout waiting for socket") +} diff --git a/lib/hypervisor/qemu/process_test.go b/lib/hypervisor/qemu/process_test.go new file mode 100644 index 00000000..ff8e93a3 --- /dev/null +++ b/lib/hypervisor/qemu/process_test.go @@ -0,0 +1,100 @@ +package qemu + +import ( + "os/exec" + "regexp" + "testing" + + "github.com/onkernel/hypeman/lib/paths" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestGetVersion_Integration is an integration test that verifies GetVersion +// works correctly with the actual QEMU binary installed on the system. +func TestGetVersion_Integration(t *testing.T) { + // Skip if QEMU is not installed + binaryName, err := qemuBinaryName() + if err != nil { + t.Skipf("Skipping test: %v", err) + } + + _, err = exec.LookPath(binaryName) + if err != nil { + t.Skipf("Skipping test: QEMU binary %s not found in PATH", binaryName) + } + + // Create starter and get version + starter := NewStarter() + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + version, err := starter.GetVersion(p) + require.NoError(t, err, "GetVersion should not return an error") + + // Verify version is not empty + assert.NotEmpty(t, version, "Version should not be empty") + + // Verify version matches expected format (e.g., "8.2.0", "9.0", "7.2.1") + versionPattern := regexp.MustCompile(`^\d+\.\d+(\.\d+)?$`) + assert.Regexp(t, versionPattern, version, "Version should match pattern X.Y or X.Y.Z") + + t.Logf("Detected QEMU version: %s", version) +} + +// TestGetVersion_ParsesVersionCorrectly tests the version parsing logic +// with various version string formats. +func TestGetVersion_ParsesVersionCorrectly(t *testing.T) { + tests := []struct { + name string + output string + expected string + wantErr bool + }{ + { + name: "debian format", + output: "QEMU emulator version 8.2.0 (Debian 1:8.2.0+dfsg-1)", + expected: "8.2.0", + }, + { + name: "simple format", + output: "QEMU emulator version 9.0.0", + expected: "9.0.0", + }, + { + name: "two part version", + output: "QEMU emulator version 9.0", + expected: "9.0", + }, + { + name: "with git info", + output: "QEMU emulator version 7.2.1 (qemu-7.2.1-1.fc38)", + expected: "7.2.1", + }, + { + name: "invalid format", + output: "Some random output", + wantErr: true, + }, + { + name: "empty output", + output: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Use the same regex as in GetVersion + re := regexp.MustCompile(`version (\d+\.\d+(?:\.\d+)?)`) + matches := re.FindStringSubmatch(tt.output) + + if tt.wantErr { + assert.Less(t, len(matches), 2, "Should not match for invalid input") + } else { + require.GreaterOrEqual(t, len(matches), 2, "Should find version match") + assert.Equal(t, tt.expected, matches[1], "Parsed version should match expected") + } + }) + } +} diff --git a/lib/hypervisor/qemu/qemu.go b/lib/hypervisor/qemu/qemu.go new file mode 100644 index 00000000..10ed70ee --- /dev/null +++ b/lib/hypervisor/qemu/qemu.go @@ -0,0 +1,138 @@ +package qemu + +import ( + "context" + "fmt" + "time" + + "github.com/digitalocean/go-qemu/qemu" + "github.com/onkernel/hypeman/lib/hypervisor" +) + +// QEMU implements hypervisor.Hypervisor for QEMU VMM. +type QEMU struct { + client *Client + socketPath string // for self-removal from pool on error +} + +// New returns a QEMU client for the given socket path. +// Uses a connection pool to ensure only one connection per socket exists. +func New(socketPath string) (*QEMU, error) { + return GetOrCreate(socketPath) +} + +// newClient creates a new QEMU client (internal, used by pool). +func newClient(socketPath string) (*QEMU, error) { + client, err := NewClient(socketPath) + if err != nil { + return nil, fmt.Errorf("create qemu client: %w", err) + } + return &QEMU{client: client, socketPath: socketPath}, nil +} + +// Verify QEMU implements the interface +var _ hypervisor.Hypervisor = (*QEMU)(nil) + +// Capabilities returns the features supported by QEMU. +func (q *QEMU) Capabilities() hypervisor.Capabilities { + return hypervisor.Capabilities{ + SupportsSnapshot: false, // Not implemented in first pass + SupportsHotplugMemory: false, // Not implemented in first pass + SupportsPause: true, + SupportsVsock: true, + SupportsGPUPassthrough: true, + } +} + +// DeleteVM removes the VM configuration from QEMU. +// This sends a graceful shutdown signal to the guest. +func (q *QEMU) DeleteVM(ctx context.Context) error { + if err := q.client.SystemPowerdown(); err != nil { + Remove(q.socketPath) + return err + } + return nil +} + +// Shutdown stops the QEMU process. +func (q *QEMU) Shutdown(ctx context.Context) error { + if err := q.client.Quit(); err != nil { + Remove(q.socketPath) + return err + } + // Connection is gone after quit, remove from pool + Remove(q.socketPath) + return nil +} + +// GetVMInfo returns current VM state. +func (q *QEMU) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) { + status, err := q.client.Status() + if err != nil { + Remove(q.socketPath) + return nil, fmt.Errorf("query status: %w", err) + } + + // Map qemu.Status to hypervisor.VMState using typed enum comparison + var state hypervisor.VMState + switch status { + case qemu.StatusRunning: + state = hypervisor.StateRunning + case qemu.StatusPaused: + state = hypervisor.StatePaused + case qemu.StatusShutdown: + state = hypervisor.StateShutdown + case qemu.StatusPreLaunch: + state = hypervisor.StateCreated + case qemu.StatusInMigrate, qemu.StatusPostMigrate, qemu.StatusFinishMigrate: + state = hypervisor.StatePaused + case qemu.StatusSuspended: + state = hypervisor.StatePaused + case qemu.StatusGuestPanicked, qemu.StatusIOError, qemu.StatusInternalError, qemu.StatusWatchdog: + // Error states - report as running so caller can investigate + state = hypervisor.StateRunning + default: + state = hypervisor.StateRunning + } + + return &hypervisor.VMInfo{ + State: state, + MemoryActualSize: nil, // Not implemented in first pass + }, nil +} + +// Pause suspends VM execution. +func (q *QEMU) Pause(ctx context.Context) error { + if err := q.client.Stop(); err != nil { + Remove(q.socketPath) + return err + } + return nil +} + +// Resume continues VM execution. +func (q *QEMU) Resume(ctx context.Context) error { + if err := q.client.Continue(); err != nil { + Remove(q.socketPath) + return err + } + return nil +} + +// Snapshot creates a VM snapshot. +// Not implemented in first pass. +func (q *QEMU) Snapshot(ctx context.Context, destPath string) error { + return fmt.Errorf("snapshot not supported by QEMU implementation") +} + +// ResizeMemory changes the VM's memory allocation. +// Not implemented in first pass. +func (q *QEMU) ResizeMemory(ctx context.Context, bytes int64) error { + return fmt.Errorf("memory resize not supported by QEMU implementation") +} + +// ResizeMemoryAndWait changes the VM's memory allocation and waits for it to stabilize. +// Not implemented in first pass. +func (q *QEMU) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { + return fmt.Errorf("memory resize not supported by QEMU implementation") +} diff --git a/lib/hypervisor/qemu/qmp.go b/lib/hypervisor/qemu/qmp.go new file mode 100644 index 00000000..155ef1f4 --- /dev/null +++ b/lib/hypervisor/qemu/qmp.go @@ -0,0 +1,96 @@ +package qemu + +import ( + "fmt" + "time" + + "github.com/digitalocean/go-qemu/qemu" + "github.com/digitalocean/go-qemu/qmp" + "github.com/digitalocean/go-qemu/qmp/raw" +) + +// Client wraps go-qemu's Domain and raw.Monitor with convenience methods. +type Client struct { + domain *qemu.Domain + raw *raw.Monitor + mon *qmp.SocketMonitor +} + +// NewClient creates a new QEMU client connected to the given socket. +func NewClient(socketPath string) (*Client, error) { + mon, err := qmp.NewSocketMonitor("unix", socketPath, 2*time.Second) + if err != nil { + return nil, fmt.Errorf("create socket monitor: %w", err) + } + + if err := mon.Connect(); err != nil { + return nil, fmt.Errorf("connect to qmp: %w", err) + } + + domain, err := qemu.NewDomain(mon, "vm") + if err != nil { + mon.Disconnect() + return nil, fmt.Errorf("create domain: %w", err) + } + + return &Client{ + domain: domain, + raw: raw.NewMonitor(mon), + mon: mon, + }, nil +} + +// Close disconnects from the QMP socket. +func (c *Client) Close() error { + return c.domain.Close() +} + +// Stop pauses VM execution (QMP 'stop' command). +func (c *Client) Stop() error { + return c.raw.Stop() +} + +// Continue resumes VM execution (QMP 'cont' command). +func (c *Client) Continue() error { + return c.raw.Cont() +} + +// Status returns the current VM status as a typed enum. +func (c *Client) Status() (qemu.Status, error) { + return c.domain.Status() +} + +// StatusInfo returns detailed status information from the raw monitor. +func (c *Client) StatusInfo() (raw.StatusInfo, error) { + return c.raw.QueryStatus() +} + +// Quit shuts down QEMU (QMP 'quit' command). +func (c *Client) Quit() error { + return c.raw.Quit() +} + +// SystemPowerdown sends ACPI power button event (graceful shutdown). +func (c *Client) SystemPowerdown() error { + return c.raw.SystemPowerdown() +} + +// SystemReset resets the VM (hard reset). +func (c *Client) SystemReset() error { + return c.raw.SystemReset() +} + +// Version returns the QEMU version string. +func (c *Client) Version() (string, error) { + return c.domain.Version() +} + +// Events returns a channel for receiving QEMU events. +func (c *Client) Events() (chan qmp.Event, chan struct{}, error) { + return c.domain.Events() +} + +// Run executes a raw QMP command (for commands not yet wrapped). +func (c *Client) Run(cmd qmp.Command) ([]byte, error) { + return c.domain.Run(cmd) +} diff --git a/lib/hypervisor/qemu/qmp_test.go b/lib/hypervisor/qemu/qmp_test.go new file mode 100644 index 00000000..16d8e2a2 --- /dev/null +++ b/lib/hypervisor/qemu/qmp_test.go @@ -0,0 +1,79 @@ +package qemu + +import ( + "testing" + + "github.com/digitalocean/go-qemu/qemu" + "github.com/digitalocean/go-qemu/qmp/raw" + "github.com/stretchr/testify/assert" +) + +func TestStatusMapping(t *testing.T) { + // Test that qemu.Status values are properly defined + tests := []struct { + name string + status qemu.Status + }{ + {"running", qemu.StatusRunning}, + {"paused", qemu.StatusPaused}, + {"shutdown", qemu.StatusShutdown}, + {"prelaunch", qemu.StatusPreLaunch}, + {"in-migrate", qemu.StatusInMigrate}, + {"post-migrate", qemu.StatusPostMigrate}, + {"finish-migrate", qemu.StatusFinishMigrate}, + {"suspended", qemu.StatusSuspended}, + {"guest-panicked", qemu.StatusGuestPanicked}, + {"io-error", qemu.StatusIOError}, + {"internal-error", qemu.StatusInternalError}, + {"watchdog", qemu.StatusWatchdog}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Verify the status is a valid enum value (not zero except for Debug) + // This ensures we're using the correct constants from go-qemu + assert.NotEqual(t, qemu.Status(-1), tt.status, "status should be valid") + }) + } +} + +func TestRunStateMapping(t *testing.T) { + // Test that raw.RunState values are properly defined + tests := []struct { + name string + state raw.RunState + }{ + {"running", raw.RunStateRunning}, + {"paused", raw.RunStatePaused}, + {"shutdown", raw.RunStateShutdown}, + {"prelaunch", raw.RunStatePrelaunch}, + {"inmigrate", raw.RunStateInmigrate}, + {"postmigrate", raw.RunStatePostmigrate}, + {"finish-migrate", raw.RunStateFinishMigrate}, + {"suspended", raw.RunStateSuspended}, + {"guest-panicked", raw.RunStateGuestPanicked}, + {"io-error", raw.RunStateIOError}, + {"internal-error", raw.RunStateInternalError}, + {"watchdog", raw.RunStateWatchdog}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Verify the state is a valid enum value + assert.NotEqual(t, raw.RunState(-1), tt.state, "state should be valid") + }) + } +} + +func TestStatusInfoFields(t *testing.T) { + // Test that StatusInfo has the expected structure + info := raw.StatusInfo{ + Running: true, + Singlestep: false, + Status: raw.RunStateRunning, + } + + assert.True(t, info.Running) + assert.False(t, info.Singlestep) + assert.Equal(t, raw.RunStateRunning, info.Status) +} diff --git a/lib/hypervisor/qemu/vsock.go b/lib/hypervisor/qemu/vsock.go new file mode 100644 index 00000000..6ee7c54d --- /dev/null +++ b/lib/hypervisor/qemu/vsock.go @@ -0,0 +1,254 @@ +package qemu + +import ( + "context" + "fmt" + "io" + "log/slog" + "net" + "time" + + "golang.org/x/sys/unix" + + "github.com/onkernel/hypeman/lib/hypervisor" +) + +const ( + // vsockDialTimeout is the timeout for connecting via AF_VSOCK + vsockDialTimeout = 5 * time.Second +) + +func init() { + hypervisor.RegisterVsockDialerFactory(hypervisor.TypeQEMU, NewVsockDialer) +} + +// VsockDialer implements hypervisor.VsockDialer for QEMU. +// QEMU with vhost-vsock-pci uses the kernel's native AF_VSOCK socket family. +// Connections are made using the guest's CID (Context ID) and port number. +type VsockDialer struct { + cid uint32 +} + +// NewVsockDialer creates a new VsockDialer for QEMU. +// The vsockSocket parameter is unused for QEMU (it uses CID instead). +// The vsockCID is the guest's Context ID assigned via vhost-vsock-pci. +func NewVsockDialer(vsockSocket string, vsockCID int64) hypervisor.VsockDialer { + return &VsockDialer{ + cid: uint32(vsockCID), + } +} + +// Key returns a unique identifier for this dialer, used for connection pooling. +func (d *VsockDialer) Key() string { + return fmt.Sprintf("qemu:%d", d.cid) +} + +// DialVsock connects to the guest on the specified port using AF_VSOCK. +// This uses the kernel's vsock infrastructure with the guest's CID. +func (d *VsockDialer) DialVsock(ctx context.Context, port int) (net.Conn, error) { + slog.DebugContext(ctx, "connecting to vsock via AF_VSOCK", "cid", d.cid, "port", port) + + // Create AF_VSOCK socket + fd, err := unix.Socket(unix.AF_VSOCK, unix.SOCK_STREAM, 0) + if err != nil { + return nil, fmt.Errorf("create vsock socket: %w", err) + } + + // Set up the sockaddr for the guest + sockaddr := &unix.SockaddrVM{ + CID: d.cid, + Port: uint32(port), + } + + // Use context deadline or default timeout + dialTimeout := vsockDialTimeout + if deadline, ok := ctx.Deadline(); ok { + if remaining := time.Until(deadline); remaining < dialTimeout { + dialTimeout = remaining + } + } + + // Set socket to non-blocking for timeout support + if err := unix.SetNonblock(fd, true); err != nil { + unix.Close(fd) + return nil, fmt.Errorf("set non-blocking: %w", err) + } + + // Attempt to connect + err = unix.Connect(fd, sockaddr) + if err != nil { + if err != unix.EINPROGRESS { + unix.Close(fd) + return nil, fmt.Errorf("connect to vsock cid=%d port=%d: %w", d.cid, port, err) + } + + // Wait for connection to complete using poll + deadline := time.Now().Add(dialTimeout) + for { + remaining := time.Until(deadline) + if remaining <= 0 { + unix.Close(fd) + return nil, fmt.Errorf("connect to vsock cid=%d port=%d: timeout after %v", d.cid, port, dialTimeout) + } + + // Poll for write readiness (indicates connection complete) + pollFds := []unix.PollFd{{ + Fd: int32(fd), + Events: unix.POLLOUT, + }} + + timeoutMs := int(remaining.Milliseconds()) + if timeoutMs < 1 { + timeoutMs = 1 + } + + n, err := unix.Poll(pollFds, timeoutMs) + if err != nil { + if err == unix.EINTR { + continue // Interrupted, retry + } + unix.Close(fd) + return nil, fmt.Errorf("poll vsock: %w", err) + } + + if n > 0 { + // Check for connection errors + errno, err := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_ERROR) + if err != nil { + unix.Close(fd) + return nil, fmt.Errorf("getsockopt: %w", err) + } + if errno != 0 { + unix.Close(fd) + return nil, fmt.Errorf("connect to vsock cid=%d port=%d: %w", d.cid, port, unix.Errno(errno)) + } + break // Connection successful + } + } + } + + // Set back to blocking mode for normal I/O + if err := unix.SetNonblock(fd, false); err != nil { + unix.Close(fd) + return nil, fmt.Errorf("set blocking: %w", err) + } + + slog.DebugContext(ctx, "vsock connection established", "cid", d.cid, "port", port) + + // Wrap the file descriptor in a net.Conn + return newVsockConn(fd, d.cid, uint32(port)) +} + +// vsockConn wraps a vsock file descriptor as a net.Conn +type vsockConn struct { + fd int + localCID uint32 + localPort uint32 + remoteCID uint32 + remotePort uint32 +} + +func newVsockConn(fd int, remoteCID, remotePort uint32) (*vsockConn, error) { + return &vsockConn{ + fd: fd, + localCID: unix.VMADDR_CID_HOST, + localPort: 0, // ephemeral + remoteCID: remoteCID, + remotePort: remotePort, + }, nil +} + +func (c *vsockConn) Read(b []byte) (int, error) { + n, err := unix.Read(c.fd, b) + // Ensure we never return negative n (violates io.Reader contract) + // This can happen when the vsock fd becomes invalid (VM died) + if n < 0 { + if err == nil { + err = io.EOF + } + return 0, err + } + return n, err +} + +func (c *vsockConn) Write(b []byte) (int, error) { + n, err := unix.Write(c.fd, b) + // Ensure we never return negative n (violates io.Writer contract) + // This can happen when the vsock fd becomes invalid (VM died) + if n < 0 { + if err == nil { + err = io.ErrClosedPipe + } + return 0, err + } + return n, err +} + +func (c *vsockConn) Close() error { + return unix.Close(c.fd) +} + +func (c *vsockConn) LocalAddr() net.Addr { + return &vsockAddr{cid: c.localCID, port: c.localPort} +} + +func (c *vsockConn) RemoteAddr() net.Addr { + return &vsockAddr{cid: c.remoteCID, port: c.remotePort} +} + +func (c *vsockConn) SetDeadline(t time.Time) error { + if t.IsZero() { + // Clear deadlines + if err := c.SetReadDeadline(t); err != nil { + return err + } + return c.SetWriteDeadline(t) + } + timeout := time.Until(t) + if timeout < 0 { + timeout = 0 + } + tv := unix.NsecToTimeval(timeout.Nanoseconds()) + if err := unix.SetsockoptTimeval(c.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil { + return err + } + return unix.SetsockoptTimeval(c.fd, unix.SOL_SOCKET, unix.SO_SNDTIMEO, &tv) +} + +func (c *vsockConn) SetReadDeadline(t time.Time) error { + var tv unix.Timeval + if !t.IsZero() { + timeout := time.Until(t) + if timeout < 0 { + timeout = 0 + } + tv = unix.NsecToTimeval(timeout.Nanoseconds()) + } + return unix.SetsockoptTimeval(c.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv) +} + +func (c *vsockConn) SetWriteDeadline(t time.Time) error { + var tv unix.Timeval + if !t.IsZero() { + timeout := time.Until(t) + if timeout < 0 { + timeout = 0 + } + tv = unix.NsecToTimeval(timeout.Nanoseconds()) + } + return unix.SetsockoptTimeval(c.fd, unix.SOL_SOCKET, unix.SO_SNDTIMEO, &tv) +} + +// vsockAddr implements net.Addr for vsock addresses +type vsockAddr struct { + cid uint32 + port uint32 +} + +func (a *vsockAddr) Network() string { + return "vsock" +} + +func (a *vsockAddr) String() string { + return fmt.Sprintf("%d:%d", a.cid, a.port) +} diff --git a/lib/instances/create.go b/lib/instances/create.go index 77b40ed7..3938fff8 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -15,8 +15,8 @@ import ( "github.com/onkernel/hypeman/lib/logger" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/system" - "github.com/onkernel/hypeman/lib/vmm" "github.com/onkernel/hypeman/lib/volumes" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "gvisor.dev/gvisor/pkg/cleanup" ) @@ -203,11 +203,32 @@ func (m *manager) createInstance( kernelVer := m.systemManager.GetDefaultKernelVersion() // 9. Get process manager for hypervisor type (needed for socket name) - hvType := hypervisor.TypeCloudHypervisor - pm, err := m.getProcessManager(hvType) + hvType := req.Hypervisor + if hvType == "" { + hvType = m.defaultHypervisor + } + + // Enrich logger and trace span with hypervisor type + log = log.With("hypervisor", string(hvType)) + ctx = logger.AddToContext(ctx, log) + if m.metrics != nil && m.metrics.tracer != nil { + span := trace.SpanFromContext(ctx) + if span.IsRecording() { + span.SetAttributes(attribute.String("hypervisor", string(hvType))) + } + } + + starter, err := m.getVMStarter(hvType) if err != nil { - log.ErrorContext(ctx, "failed to get process manager", "error", err) - return nil, fmt.Errorf("get process manager: %w", err) + log.ErrorContext(ctx, "failed to get vm starter", "error", err) + return nil, fmt.Errorf("get vm starter for %s: %w", hvType, err) + } + + // Get hypervisor version + hvVersion, err := starter.GetVersion(m.paths) + if err != nil { + log.WarnContext(ctx, "failed to get hypervisor version", "hypervisor", hvType, "error", err) + hvVersion = "unknown" } // 10. Validate, resolve, and auto-bind devices (GPU passthrough) @@ -280,8 +301,8 @@ func (m *manager) createInstance( StoppedAt: nil, KernelVersion: string(kernelVer), HypervisorType: hvType, - HypervisorVersion: string(vmm.V49_0), // Use latest - SocketPath: m.paths.InstanceSocket(id, pm.SocketName()), + HypervisorVersion: hvVersion, + SocketPath: m.paths.InstanceSocket(id, starter.SocketName()), DataDir: m.paths.InstanceDir(id), VsockCID: vsockCID, VsockSocket: vsockSocket, @@ -406,13 +427,13 @@ func (m *manager) createInstance( // Record metrics if m.metrics != nil { - m.recordDuration(ctx, m.metrics.createDuration, start, "success") - m.recordStateTransition(ctx, "stopped", string(StateRunning)) + m.recordDuration(ctx, m.metrics.createDuration, start, "success", hvType) + m.recordStateTransition(ctx, "stopped", string(StateRunning), hvType) } // Return instance with derived state finalInst := m.toInstance(ctx, meta) - log.InfoContext(ctx, "instance created successfully", "instance_id", id, "name", req.Name, "state", finalInst.State) + log.InfoContext(ctx, "instance created successfully", "instance_id", id, "name", req.Name, "state", finalInst.State, "hypervisor", hvType) return &finalInst, nil } @@ -527,27 +548,10 @@ func (m *manager) startAndBootVM( ) error { log := logger.FromContext(ctx) - // Get process manager for this hypervisor type - pm, err := m.getProcessManager(stored.HypervisorType) - if err != nil { - return fmt.Errorf("get process manager: %w", err) - } - - // Start VMM process and capture PID - log.DebugContext(ctx, "starting VMM process", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion) - pid, err := pm.StartProcess(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath) + // Get VM starter for this hypervisor type + starter, err := m.getVMStarter(stored.HypervisorType) if err != nil { - return fmt.Errorf("start vmm: %w", err) - } - - // Store the PID for later cleanup - stored.HypervisorPID = &pid - log.DebugContext(ctx, "VMM process started", "instance_id", stored.Id, "pid", pid) - - // Create hypervisor client - hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType) - if err != nil { - return fmt.Errorf("create hypervisor client: %w", err) + return fmt.Errorf("get vm starter: %w", err) } // Build VM configuration @@ -557,20 +561,16 @@ func (m *manager) startAndBootVM( return fmt.Errorf("build vm config: %w", err) } - // Create VM in hypervisor - log.DebugContext(ctx, "creating VM in hypervisor", "instance_id", stored.Id) - if err := hv.CreateVM(ctx, vmConfig); err != nil { - return fmt.Errorf("create vm: %w", err) + // Start VM (handles process start, configuration, and boot) + log.DebugContext(ctx, "starting VM", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion) + pid, hv, err := starter.StartVM(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath, vmConfig) + if err != nil { + return fmt.Errorf("start vm: %w", err) } - // Transition: Created → Running (boot VM) - log.DebugContext(ctx, "booting VM", "instance_id", stored.Id) - if err := hv.BootVM(ctx); err != nil { - // Try to cleanup - hv.DeleteVM(ctx) - hv.Shutdown(ctx) - return fmt.Errorf("boot vm: %w", err) - } + // Store the PID for later cleanup + stored.HypervisorPID = &pid + log.DebugContext(ctx, "VM started", "instance_id", stored.Id, "pid", pid) // Optional: Expand memory to max if hotplug configured if inst.HotplugSize > 0 && hv.Capabilities().SupportsHotplugMemory { diff --git a/lib/instances/delete.go b/lib/instances/delete.go index d0ddb8cb..5840e7f1 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -7,6 +7,8 @@ import ( "syscall" "time" + "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/logger" "github.com/onkernel/hypeman/lib/network" ) @@ -39,7 +41,12 @@ func (m *manager) deleteInstance( } } - // 3. If hypervisor might be running, force kill it + // 3. Close exec gRPC connection before killing hypervisor to prevent panic + if dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID); err == nil { + guest.CloseConn(dialer.Key()) + } + + // 4. If hypervisor might be running, force kill it // Also attempt kill for StateUnknown since we can't be sure if hypervisor is running if inst.State.RequiresVMM() || inst.State == StateUnknown { log.DebugContext(ctx, "stopping hypervisor", "instance_id", id, "state", inst.State) @@ -50,7 +57,7 @@ func (m *manager) deleteInstance( } } - // 4. Release network allocation + // 5. Release network allocation if inst.NetworkEnabled { log.DebugContext(ctx, "releasing network", "instance_id", id, "network", "default") if err := m.networkManager.ReleaseAllocation(ctx, networkAlloc); err != nil { @@ -59,7 +66,7 @@ func (m *manager) deleteInstance( } } - // 5. Detach and auto-unbind devices from VFIO + // 6. Detach and auto-unbind devices from VFIO if len(inst.Devices) > 0 && m.deviceManager != nil { for _, deviceID := range inst.Devices { log.DebugContext(ctx, "detaching device", "id", id, "device", deviceID) @@ -76,7 +83,7 @@ func (m *manager) deleteInstance( } } - // 5b. Detach volumes + // 6b. Detach volumes if len(inst.Volumes) > 0 { log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes)) for _, volAttach := range inst.Volumes { @@ -87,7 +94,7 @@ func (m *manager) deleteInstance( } } - // 6. Delete all instance data + // 7. Delete all instance data log.DebugContext(ctx, "deleting instance data", "instance_id", id) if err := m.deleteInstanceData(id); err != nil { log.ErrorContext(ctx, "failed to delete instance data", "instance_id", id, "error", err) diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go index 46109309..1efb471e 100644 --- a/lib/instances/exec_test.go +++ b/lib/instances/exec_test.go @@ -10,14 +10,15 @@ import ( "time" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/paths" "github.com/onkernel/hypeman/lib/system" "github.com/stretchr/testify/require" ) -// waitForGuestAgent polls until guest-agent is ready -func waitForGuestAgent(ctx context.Context, mgr *manager, instanceID string, timeout time.Duration) error { +// waitForExecAgent polls until exec-agent is ready +func waitForExecAgent(ctx context.Context, mgr *manager, instanceID string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { logs, err := collectLogs(ctx, mgr, instanceID, 100) @@ -89,12 +90,12 @@ func TestExecConcurrent(t *testing.T) { manager.DeleteInstance(ctx, inst.Id) }) - // Wait for guest-agent to be ready (retry here is OK - we're just waiting for startup) - err = waitForGuestAgent(ctx, manager, inst.Id, 15*time.Second) - require.NoError(t, err, "guest-agent should be ready") + // Wait for exec-agent to be ready (retry here is OK - we're just waiting for startup) + err = waitForExecAgent(ctx, manager, inst.Id, 15*time.Second) + require.NoError(t, err, "exec-agent should be ready") - // Verify guest-agent works with a simple command first - _, code, err := execCommand(ctx, inst.VsockSocket, "echo", "ready") + // Verify exec-agent works with a simple command first + _, code, err := execCommand(ctx, inst, "echo", "ready") require.NoError(t, err, "initial exec should work") require.Equal(t, 0, code, "initial exec should succeed") @@ -117,7 +118,7 @@ func TestExecConcurrent(t *testing.T) { for i := 1; i <= numIterations; i++ { // Write (no retry - must work first time) writeCmd := fmt.Sprintf("echo '%d-%d' > %s", workerID, i, filename) - output, code, err := execCommand(ctx, inst.VsockSocket, "/bin/sh", "-c", writeCmd) + output, code, err := execCommand(ctx, inst, "/bin/sh", "-c", writeCmd) if err != nil { errors <- fmt.Errorf("worker %d, iter %d: write error: %w", workerID, i, err) return @@ -128,7 +129,7 @@ func TestExecConcurrent(t *testing.T) { } // Read (no retry - must work first time) - output, code, err = execCommand(ctx, inst.VsockSocket, "cat", filename) + output, code, err = execCommand(ctx, inst, "cat", filename) if err != nil { errors <- fmt.Errorf("worker %d, iter %d: read error: %w", workerID, i, err) return @@ -180,7 +181,7 @@ func TestExecConcurrent(t *testing.T) { // Command that takes ~2 seconds and produces output cmd := fmt.Sprintf("sleep %d && echo 'stream-%d-done'", streamDuration, workerID) - output, code, err := execCommand(ctx, inst.VsockSocket, "/bin/sh", "-c", cmd) + output, code, err := execCommand(ctx, inst, "/bin/sh", "-c", cmd) if err != nil { streamErrors <- fmt.Errorf("stream worker %d: error: %w", workerID, err) return @@ -221,9 +222,12 @@ func TestExecConcurrent(t *testing.T) { t.Log("Phase 3: Testing exec with non-existent command...") // Test without TTY + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + require.NoError(t, err) + start := time.Now() var stdout, stderr strings.Builder - _, err = guest.ExecIntoInstance(ctx, inst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ Command: []string{"nonexistent_command_asdfasdf"}, Stdout: &stdout, Stderr: &stderr, @@ -240,7 +244,7 @@ func TestExecConcurrent(t *testing.T) { start = time.Now() stdout.Reset() stderr.Reset() - _, err = guest.ExecIntoInstance(ctx, inst.VsockSocket, guest.ExecOptions{ + _, err = guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ Command: []string{"nonexistent_command_xyz123"}, Stdout: &stdout, Stderr: &stderr, diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 08980e80..915879d4 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -8,6 +8,7 @@ import ( "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/hypervisor/cloudhypervisor" + "github.com/onkernel/hypeman/lib/hypervisor/qemu" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" @@ -57,12 +58,19 @@ type manager struct { metrics *Metrics // Hypervisor support - processManagers map[hypervisor.Type]hypervisor.ProcessManager + vmStarters map[hypervisor.Type]hypervisor.VMStarter + defaultHypervisor hypervisor.Type // Default hypervisor type when not specified in request } // NewManager creates a new instances manager. // If meter is nil, metrics are disabled. -func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager { +// defaultHypervisor specifies which hypervisor to use when not specified in requests. +func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager, limits ResourceLimits, defaultHypervisor hypervisor.Type, meter metric.Meter, tracer trace.Tracer) Manager { + // Validate and default the hypervisor type + if defaultHypervisor == "" { + defaultHypervisor = hypervisor.TypeCloudHypervisor + } + m := &manager{ paths: p, imageManager: imageManager, @@ -73,9 +81,11 @@ func NewManager(p *paths.Paths, imageManager images.Manager, systemManager syste limits: limits, instanceLocks: sync.Map{}, hostTopology: detectHostTopology(), // Detect and cache host topology - processManagers: map[hypervisor.Type]hypervisor.ProcessManager{ - hypervisor.TypeCloudHypervisor: cloudhypervisor.NewProcessManager(), + vmStarters: map[hypervisor.Type]hypervisor.VMStarter{ + hypervisor.TypeCloudHypervisor: cloudhypervisor.NewStarter(), + hypervisor.TypeQEMU: qemu.NewStarter(), }, + defaultHypervisor: defaultHypervisor, } // Initialize metrics if meter is provided @@ -90,22 +100,25 @@ func NewManager(p *paths.Paths, imageManager images.Manager, systemManager syste } // getHypervisor creates a hypervisor client for the given socket and type. +// Used for connecting to already-running VMs (e.g., for state queries). func (m *manager) getHypervisor(socketPath string, hvType hypervisor.Type) (hypervisor.Hypervisor, error) { switch hvType { case hypervisor.TypeCloudHypervisor: return cloudhypervisor.New(socketPath) + case hypervisor.TypeQEMU: + return qemu.New(socketPath) default: return nil, fmt.Errorf("unsupported hypervisor type: %s", hvType) } } -// getProcessManager returns the process manager for the given hypervisor type. -func (m *manager) getProcessManager(hvType hypervisor.Type) (hypervisor.ProcessManager, error) { - pm, ok := m.processManagers[hvType] +// getVMStarter returns the VM starter for the given hypervisor type. +func (m *manager) getVMStarter(hvType hypervisor.Type) (hypervisor.VMStarter, error) { + starter, ok := m.vmStarters[hvType] if !ok { - return nil, fmt.Errorf("no process manager for hypervisor type: %s", hvType) + return nil, fmt.Errorf("no VM starter for hypervisor type: %s", hvType) } - return pm, nil + return starter, nil } // getInstanceLock returns or creates a lock for a specific instance diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 7efb8285..ccd89adc 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -57,7 +57,7 @@ func setupTestManager(t *testing.T) (*manager, string) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil).(*manager) // Register cleanup to kill any orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -604,13 +604,18 @@ func TestBasicEndToEnd(t *testing.T) { var lastExitCode int var lastErr error + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return "", -1, err + } + for attempt := 0; attempt < 5; attempt++ { if attempt > 0 { time.Sleep(200 * time.Millisecond) } var stdout, stderr bytes.Buffer - exit, err := guest.ExecIntoInstance(ctx, inst.VsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ Command: command, Stdout: &stdout, Stderr: &stderr, @@ -766,7 +771,7 @@ func TestStorageOperations(t *testing.T) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - manager := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) + manager := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil).(*manager) // Test metadata doesn't exist initially _, err := manager.loadMetadata("nonexistent") diff --git a/lib/instances/metrics.go b/lib/instances/metrics.go index 78901b98..e07c5983 100644 --- a/lib/instances/metrics.go +++ b/lib/instances/metrics.go @@ -4,6 +4,8 @@ import ( "context" "time" + "github.com/onkernel/hypeman/lib/hypervisor" + mw "github.com/onkernel/hypeman/lib/middleware" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" @@ -90,13 +92,25 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M if err != nil { return nil } - stateCounts := make(map[string]int64) + // Count by state and hypervisor combination + type stateHypervisor struct { + state string + hypervisor string + } + counts := make(map[stateHypervisor]int64) for _, inst := range instances { - stateCounts[string(inst.State)]++ + key := stateHypervisor{ + state: string(inst.State), + hypervisor: string(inst.HypervisorType), + } + counts[key]++ } - for state, count := range stateCounts { + for key, count := range counts { o.ObserveInt64(instancesTotal, count, - metric.WithAttributes(attribute.String("state", state))) + metric.WithAttributes( + attribute.String("state", key.state), + attribute.String("hypervisor", key.hypervisor), + )) } return nil }, @@ -117,24 +131,41 @@ func newInstanceMetrics(meter metric.Meter, tracer trace.Tracer, m *manager) (*M }, nil } -// recordDuration records operation duration. -func (m *manager) recordDuration(ctx context.Context, histogram metric.Float64Histogram, start time.Time, status string) { +// getHypervisorFromContext extracts the hypervisor type from the resolved instance in context. +// Returns empty string if not available. +func getHypervisorFromContext(ctx context.Context) string { + if inst := mw.GetResolvedInstance[Instance](ctx); inst != nil { + return string(inst.HypervisorType) + } + return "" +} + +// recordDuration records operation duration with hypervisor label. +func (m *manager) recordDuration(ctx context.Context, histogram metric.Float64Histogram, start time.Time, status string, hvType hypervisor.Type) { if m.metrics == nil { return } duration := time.Since(start).Seconds() - histogram.Record(ctx, duration, - metric.WithAttributes(attribute.String("status", status))) + attrs := []attribute.KeyValue{ + attribute.String("status", status), + } + if hvType != "" { + attrs = append(attrs, attribute.String("hypervisor", string(hvType))) + } + histogram.Record(ctx, duration, metric.WithAttributes(attrs...)) } -// recordStateTransition records a state transition. -func (m *manager) recordStateTransition(ctx context.Context, fromState, toState string) { +// recordStateTransition records a state transition with hypervisor label. +func (m *manager) recordStateTransition(ctx context.Context, fromState, toState string, hvType hypervisor.Type) { if m.metrics == nil { return } - m.metrics.stateTransitions.Add(ctx, 1, - metric.WithAttributes( + attrs := []attribute.KeyValue{ attribute.String("from", fromState), attribute.String("to", toState), - )) + } + if hvType != "" { + attrs = append(attrs, attribute.String("hypervisor", string(hvType))) + } + m.metrics.stateTransitions.Add(ctx, 1, metric.WithAttributes(attrs...)) } diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go index 72dce064..0ad25494 100644 --- a/lib/instances/network_test.go +++ b/lib/instances/network_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -115,7 +116,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { // Test initial internet connectivity via exec t.Log("Testing initial internet connectivity via exec...") - output, exitCode, err := execCommand(ctx, inst.VsockSocket, "curl", "-s", "--connect-timeout", "10", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") + output, exitCode, err := execCommand(ctx, inst, "curl", "-s", "--connect-timeout", "10", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") if err != nil || exitCode != 0 { t.Logf("curl failed: exitCode=%d err=%v output=%s", exitCode, err, output) } @@ -182,7 +183,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { var restoreOutput string var restoreExitCode int for i := 0; i < 10; i++ { - restoreOutput, restoreExitCode, err = execCommand(ctx, inst.VsockSocket, "curl", "-s", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") + restoreOutput, restoreExitCode, err = execCommand(ctx, inst, "curl", "-s", "https://public-ping-bucket-kernel.s3.us-east-1.amazonaws.com/index.html") if err == nil && restoreExitCode == 0 { break } @@ -196,7 +197,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { // Verify the original nginx process is still running (proves restore worked, not reboot) t.Log("Verifying nginx master process is still running...") - psOutput, psExitCode, err := execCommand(ctx, inst.VsockSocket, "ps", "aux") + psOutput, psExitCode, err := execCommand(ctx, inst, "ps", "aux") require.NoError(t, err) require.Equal(t, 0, psExitCode) require.Contains(t, psOutput, "nginx: master process", "nginx master should still be running") @@ -223,10 +224,15 @@ func TestCreateInstanceWithNetwork(t *testing.T) { } // execCommand runs a command in the instance via vsock and returns stdout+stderr, exit code, and error -func execCommand(ctx context.Context, vsockSocket string, command ...string) (string, int, error) { +func execCommand(ctx context.Context, inst *Instance, command ...string) (string, int, error) { + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return "", -1, err + } + var stdout, stderr bytes.Buffer - exit, err := guest.ExecIntoInstance(ctx, vsockSocket, guest.ExecOptions{ + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ Command: command, Stdin: nil, Stdout: &stdout, diff --git a/lib/instances/qemu_test.go b/lib/instances/qemu_test.go new file mode 100644 index 00000000..b7f5a36b --- /dev/null +++ b/lib/instances/qemu_test.go @@ -0,0 +1,538 @@ +package instances + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/guest" + "github.com/onkernel/hypeman/lib/hypervisor" + "github.com/onkernel/hypeman/lib/hypervisor/qemu" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/ingress" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// setupTestManagerForQEMU creates a manager configured to use QEMU as the default hypervisor +func setupTestManagerForQEMU(t *testing.T) (*manager, string) { + tmpDir := t.TempDir() + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + p := paths.New(tmpDir) + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemManager := system.NewManager(p) + networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) + volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage + limits := ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB + MaxVcpusPerInstance: 0, // unlimited + MaxMemoryPerInstance: 0, // unlimited + MaxTotalVcpus: 0, // unlimited + MaxTotalMemory: 0, // unlimited + } + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, hypervisor.TypeQEMU, nil, nil).(*manager) + + // Register cleanup to kill any orphaned QEMU processes + t.Cleanup(func() { + cleanupOrphanedQEMUProcesses(t, mgr) + }) + + return mgr, tmpDir +} + +// cleanupOrphanedQEMUProcesses kills any QEMU processes from metadata +func cleanupOrphanedQEMUProcesses(t *testing.T, mgr *manager) { + metaFiles, err := mgr.listMetadataFiles() + if err != nil { + return + } + + for _, metaFile := range metaFiles { + id := filepath.Base(filepath.Dir(metaFile)) + meta, err := mgr.loadMetadata(id) + if err != nil { + continue + } + + if meta.HypervisorPID != nil { + pid := *meta.HypervisorPID + if err := syscall.Kill(pid, 0); err == nil { + t.Logf("Cleaning up orphaned QEMU process: PID %d (instance %s)", pid, id) + syscall.Kill(pid, syscall.SIGKILL) + WaitForProcessExit(pid, 1*time.Second) + } + } + } +} + +// waitForQEMUReady polls QEMU status via QMP until it's running or times out +func waitForQEMUReady(ctx context.Context, socketPath string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + client, err := qemu.New(socketPath) + if err != nil { + time.Sleep(100 * time.Millisecond) + continue + } + + info, err := client.GetVMInfo(ctx) + if err != nil { + time.Sleep(100 * time.Millisecond) + continue + } + + if info.State == hypervisor.StateRunning { + return nil + } + + time.Sleep(100 * time.Millisecond) + } + + return fmt.Errorf("QEMU VM did not reach running state within %v", timeout) +} + +// collectQEMULogs gets the last N lines of logs (non-streaming) +func collectQEMULogs(ctx context.Context, mgr *manager, instanceID string, n int) (string, error) { + logChan, err := mgr.StreamInstanceLogs(ctx, instanceID, n, false, LogSourceApp) + if err != nil { + return "", err + } + + var lines []string + for line := range logChan { + lines = append(lines, line) + } + + return strings.Join(lines, "\n"), nil +} + +// qemuInstanceResolver is a simple resolver for ingress tests +type qemuInstanceResolver struct { + ip string + exists bool +} + +func (r *qemuInstanceResolver) ResolveInstanceIP(ctx context.Context, nameOrID string) (string, error) { + if r.ip == "" { + return "", fmt.Errorf("instance not found: %s", nameOrID) + } + return r.ip, nil +} + +func (r *qemuInstanceResolver) InstanceExists(ctx context.Context, nameOrID string) (bool, error) { + return r.exists, nil +} + +func (r *qemuInstanceResolver) ResolveInstance(ctx context.Context, nameOrID string) (string, string, error) { + if !r.exists { + return "", "", fmt.Errorf("instance not found: %s", nameOrID) + } + return nameOrID, nameOrID, nil +} + +// TestQEMUBasicEndToEnd tests the complete instance lifecycle with QEMU. +// This is the primary integration test for QEMU support. +// It tests: create, get, list, logs, network, ingress, volumes, exec, and delete. +// It does NOT test: snapshot/standby, hot memory resize (not supported by QEMU in first pass). +func TestQEMUBasicEndToEnd(t *testing.T) { + // Require KVM access + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + } + + // Require QEMU to be installed + starter := qemu.NewStarter() + if _, err := starter.GetBinaryPath(nil, ""); err != nil { + t.Fatalf("QEMU not available: %v", err) + } + + manager, tmpDir := setupTestManagerForQEMU(t) + ctx := context.Background() + + // Get the image manager for image operations + imageManager, err := images.NewManager(paths.New(tmpDir), 1, nil) + require.NoError(t, err) + + // Pull nginx image + t.Log("Pulling nginx:alpine image...") + nginxImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, err) + + // Wait for image to be ready + t.Log("Waiting for image build to complete...") + imageName := nginxImage.Name + for i := 0; i < 60; i++ { + img, err := imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + nginxImage = img + break + } + if err == nil && img.Status == images.StatusFailed { + t.Fatalf("Image build failed: %s", *img.Error) + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, nginxImage.Status, "Image should be ready after 60 seconds") + t.Log("Nginx image ready") + + // Ensure system files + systemManager := system.NewManager(paths.New(tmpDir)) + t.Log("Ensuring system files (downloads kernel and builds initrd)...") + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Create a volume to attach + p := paths.New(tmpDir) + volumeManager := volumes.NewManager(p, 0, nil) + t.Log("Creating volume...") + vol, err := volumeManager.CreateVolume(ctx, volumes.CreateVolumeRequest{ + Name: "test-data", + SizeGb: 1, + }) + require.NoError(t, err) + require.NotNil(t, vol) + t.Logf("Volume created: %s", vol.Id) + + // Verify volume file exists and is not attached + assert.FileExists(t, p.VolumeData(vol.Id)) + assert.Empty(t, vol.Attachments, "Volume should not be attached yet") + + // Initialize network + networkManager := network.NewManager(p, &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + }, nil) + t.Log("Initializing network...") + err = networkManager.Initialize(ctx, nil) + require.NoError(t, err) + t.Log("Network initialized") + + // Create instance with QEMU hypervisor + req := CreateInstanceRequest{ + Name: "test-nginx-qemu", + Image: "docker.io/library/nginx:alpine", + Size: 2 * 1024 * 1024 * 1024, // 2GB + HotplugSize: 512 * 1024 * 1024, // 512MB (unused by QEMU, but part of the request) + OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB + Vcpus: 1, + NetworkEnabled: true, + Hypervisor: hypervisor.TypeQEMU, // Explicitly use QEMU + Env: map[string]string{ + "TEST_VAR": "test_value", + }, + Volumes: []VolumeAttachment{ + { + VolumeID: vol.Id, + MountPath: "/mnt/data", + Readonly: false, + }, + }, + } + + t.Log("Creating QEMU instance...") + inst, err := manager.CreateInstance(ctx, req) + require.NoError(t, err) + require.NotNil(t, inst) + t.Logf("Instance created: %s (hypervisor: %s)", inst.Id, inst.HypervisorType) + + // Verify instance fields + assert.NotEmpty(t, inst.Id) + assert.Equal(t, "test-nginx-qemu", inst.Name) + assert.Equal(t, "docker.io/library/nginx:alpine", inst.Image) + assert.Equal(t, StateRunning, inst.State) + assert.Equal(t, hypervisor.TypeQEMU, inst.HypervisorType) + assert.False(t, inst.HasSnapshot) + assert.NotEmpty(t, inst.KernelVersion) + + // Verify volume is attached to instance + assert.Len(t, inst.Volumes, 1, "Instance should have 1 volume attached") + assert.Equal(t, vol.Id, inst.Volumes[0].VolumeID) + assert.Equal(t, "/mnt/data", inst.Volumes[0].MountPath) + + // Verify volume shows as attached + vol, err = volumeManager.GetVolume(ctx, vol.Id) + require.NoError(t, err) + require.Len(t, vol.Attachments, 1, "Volume should be attached") + assert.Equal(t, inst.Id, vol.Attachments[0].InstanceID) + assert.Equal(t, "/mnt/data", vol.Attachments[0].MountPath) + + // Verify directories exist + assert.DirExists(t, p.InstanceDir(inst.Id)) + assert.FileExists(t, p.InstanceMetadata(inst.Id)) + assert.FileExists(t, p.InstanceOverlay(inst.Id)) + assert.FileExists(t, p.InstanceConfigDisk(inst.Id)) + + // Wait for VM to be fully running + err = waitForQEMUReady(ctx, inst.SocketPath, 10*time.Second) + require.NoError(t, err, "QEMU VM should reach running state") + + // Get instance + retrieved, err := manager.GetInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, inst.Id, retrieved.Id) + assert.Equal(t, StateRunning, retrieved.State) + + // List instances + instances, err := manager.ListInstances(ctx) + require.NoError(t, err) + assert.Len(t, instances, 1) + assert.Equal(t, inst.Id, instances[0].Id) + + // Poll for logs to contain nginx startup message + var logs string + foundNginxStartup := false + for i := 0; i < 50; i++ { + logs, err = collectQEMULogs(ctx, manager, inst.Id, 100) + require.NoError(t, err) + + if strings.Contains(logs, "start worker processes") { + foundNginxStartup = true + break + } + time.Sleep(100 * time.Millisecond) + } + + t.Logf("Instance logs (last 100 lines):\n%s", logs) + assert.True(t, foundNginxStartup, "Nginx should have started worker processes within 5 seconds") + + // Test ingress - route external traffic to nginx + t.Log("Testing ingress routing to nginx...") + + // Get random free ports + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + ingressPort := listener.Addr().(*net.TCPAddr).Port + listener.Close() + + adminListener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + adminPort := adminListener.Addr().(*net.TCPAddr).Port + adminListener.Close() + + t.Logf("Using random ports: ingress=%d, admin=%d", ingressPort, adminPort) + + // Create ingress manager + ingressConfig := ingress.Config{ + ListenAddress: "127.0.0.1", + AdminAddress: "127.0.0.1", + AdminPort: adminPort, + DNSPort: 0, + StopOnShutdown: true, + } + + instanceIP := inst.IP + require.NotEmpty(t, instanceIP, "Instance should have an IP address") + t.Logf("Instance IP: %s", instanceIP) + + resolver := &qemuInstanceResolver{ + ip: instanceIP, + exists: true, + } + + ingressManager := ingress.NewManager(p, ingressConfig, resolver, nil) + + // Initialize ingress manager (starts Caddy) + t.Log("Starting Caddy...") + err = ingressManager.Initialize(ctx) + require.NoError(t, err, "Ingress manager should initialize successfully") + + t.Cleanup(func() { + t.Log("Shutting down Caddy...") + if err := ingressManager.Shutdown(context.Background()); err != nil { + t.Logf("Warning: failed to shutdown ingress manager: %v", err) + } + }) + + // Create an ingress rule + t.Log("Creating ingress rule...") + ingressReq := ingress.CreateIngressRequest{ + Name: "test-nginx-ingress", + Rules: []ingress.IngressRule{ + { + Match: ingress.IngressMatch{ + Hostname: "test.local", + Port: ingressPort, + }, + Target: ingress.IngressTarget{ + Instance: "test-nginx-qemu", + Port: 80, + }, + }, + }, + } + ing, err := ingressManager.Create(ctx, ingressReq) + require.NoError(t, err) + require.NotNil(t, ing) + t.Logf("Ingress created: %s", ing.ID) + + // Make HTTP request through Caddy to nginx + t.Log("Making HTTP request through Caddy to nginx...") + client := &http.Client{Timeout: 2 * time.Second} + var resp *http.Response + var lastErr error + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + httpReq, err := http.NewRequest("GET", fmt.Sprintf("http://127.0.0.1:%d/", ingressPort), nil) + require.NoError(t, err) + httpReq.Host = "test.local" + + resp, lastErr = client.Do(httpReq) + if lastErr == nil && resp.StatusCode == http.StatusOK { + break + } + if resp != nil { + resp.Body.Close() + resp = nil + } + time.Sleep(200 * time.Millisecond) + } + require.NoError(t, lastErr, "HTTP request through Caddy should succeed") + require.NotNil(t, resp, "HTTP response should not be nil") + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode, "Should get 200 OK from nginx") + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Contains(t, string(body), "nginx", "Response should contain nginx welcome page") + t.Logf("Got response from nginx through Caddy: %d bytes", len(body)) + + err = ingressManager.Delete(ctx, ing.ID) + require.NoError(t, err) + t.Log("Ingress deleted") + + // Test volume is accessible from inside the guest via exec + t.Log("Testing volume from inside guest via exec...") + + runCmd := func(command ...string) (string, int, error) { + var lastOutput string + var lastExitCode int + var lastErr error + + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return "", -1, err + } + + for attempt := 0; attempt < 5; attempt++ { + if attempt > 0 { + time.Sleep(200 * time.Millisecond) + } + + var stdout, stderr bytes.Buffer + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ + Command: command, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + + output := stdout.String() + if stderr.Len() > 0 { + output += stderr.String() + } + output = strings.TrimSpace(output) + + if err != nil { + lastErr = err + lastOutput = output + lastExitCode = -1 + continue + } + + lastOutput = output + lastExitCode = exit.Code + lastErr = nil + + if output != "" || exit.Code == 0 { + return output, exit.Code, nil + } + } + + return lastOutput, lastExitCode, lastErr + } + + // Test volume in a single exec call + testContent := "hello-from-qemu-volume-test" + script := fmt.Sprintf(` + set -e + echo "=== Volume directory ===" + ls -la /mnt/data + echo "=== Writing test file ===" + echo '%s' > /mnt/data/test.txt + echo "=== Reading test file ===" + cat /mnt/data/test.txt + echo "=== Volume mount info ===" + df -h /mnt/data + `, testContent) + + output, exitCode, err := runCmd("sh", "-c", script) + require.NoError(t, err, "Volume test script should execute") + require.Equal(t, 0, exitCode, "Volume test script should succeed") + + require.Contains(t, output, "lost+found", "Volume should be ext4-formatted") + require.Contains(t, output, testContent, "Should be able to read written content") + require.Contains(t, output, "/dev/vd", "Volume should be mounted from block device") + t.Logf("Volume test output:\n%s", output) + t.Log("Volume read/write test passed!") + + // Delete instance + t.Log("Deleting instance...") + err = manager.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + + // Verify cleanup + assert.NoDirExists(t, p.InstanceDir(inst.Id)) + + // Verify instance no longer exists + _, err = manager.GetInstance(ctx, inst.Id) + assert.ErrorIs(t, err, ErrNotFound) + + // Verify volume is detached but still exists + vol, err = volumeManager.GetVolume(ctx, vol.Id) + require.NoError(t, err) + assert.Empty(t, vol.Attachments, "Volume should be detached after instance deletion") + assert.FileExists(t, p.VolumeData(vol.Id), "Volume file should still exist") + + // Delete volume + t.Log("Deleting volume...") + err = volumeManager.DeleteVolume(ctx, vol.Id) + require.NoError(t, err) + + // Verify volume is gone + _, err = volumeManager.GetVolume(ctx, vol.Id) + assert.ErrorIs(t, err, volumes.ErrNotFound) + + t.Log("QEMU instance lifecycle test complete!") +} diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go index 9392ee0a..1895d87e 100644 --- a/lib/instances/resource_limits_test.go +++ b/lib/instances/resource_limits_test.go @@ -163,7 +163,7 @@ func createTestManager(t *testing.T, limits ResourceLimits) *manager { deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 0, nil) - return NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil).(*manager) + return NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, "", nil, nil).(*manager) } func TestResourceLimits_StructValues(t *testing.T) { @@ -267,7 +267,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { MaxTotalMemory: 6 * 1024 * 1024 * 1024, // aggregate: only 6GB total (allows first 2.5GB VM) } - mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil).(*manager) // Cleanup any orphaned processes on test end t.Cleanup(func() { diff --git a/lib/instances/restore.go b/lib/instances/restore.go index d4698bf0..5f7af9b6 100644 --- a/lib/instances/restore.go +++ b/lib/instances/restore.go @@ -6,6 +6,7 @@ import ( "os" "time" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/logger" "go.opentelemetry.io/otel/trace" ) @@ -64,7 +65,8 @@ func (m *manager) restoreInstance( // 5. Transition: Standby → Paused (start hypervisor + restore) log.DebugContext(ctx, "restoring from snapshot", "instance_id", id, "snapshot_dir", snapshotDir) - if err := m.restoreFromSnapshot(ctx, stored, snapshotDir); err != nil { + pid, hv, err := m.restoreFromSnapshot(ctx, stored, snapshotDir) + if err != nil { log.ErrorContext(ctx, "failed to restore from snapshot", "instance_id", id, "error", err) // Cleanup network on failure if stored.NetworkEnabled { @@ -74,23 +76,15 @@ func (m *manager) restoreInstance( return nil, err } - // 6. Create hypervisor client for resumed VM - hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType) - if err != nil { - log.ErrorContext(ctx, "failed to create hypervisor client", "instance_id", id, "error", err) - // Cleanup network on failure - if stored.NetworkEnabled { - netAlloc, _ := m.networkManager.GetAllocation(ctx, id) - m.networkManager.ReleaseAllocation(ctx, netAlloc) - } - return nil, fmt.Errorf("create hypervisor client: %w", err) - } + // Store the PID for later cleanup + stored.HypervisorPID = &pid - // 7. Transition: Paused → Running (resume) + // 6. Transition: Paused → Running (resume) log.DebugContext(ctx, "resuming VM", "instance_id", id) if err := hv.Resume(ctx); err != nil { log.ErrorContext(ctx, "failed to resume VM", "instance_id", id, "error", err) - // Cleanup network on failure + // Cleanup on failure + hv.Shutdown(ctx) if stored.NetworkEnabled { netAlloc, _ := m.networkManager.GetAllocation(ctx, id) m.networkManager.ReleaseAllocation(ctx, netAlloc) @@ -114,8 +108,8 @@ func (m *manager) restoreInstance( // Record metrics if m.metrics != nil { - m.recordDuration(ctx, m.metrics.restoreDuration, start, "success") - m.recordStateTransition(ctx, string(StateStandby), string(StateRunning)) + m.recordDuration(ctx, m.metrics.restoreDuration, start, "success", stored.HypervisorType) + m.recordStateTransition(ctx, string(StateStandby), string(StateRunning), stored.HypervisorType) } // Return instance with derived state (should be Running now) @@ -129,40 +123,22 @@ func (m *manager) restoreFromSnapshot( ctx context.Context, stored *StoredMetadata, snapshotDir string, -) error { +) (int, hypervisor.Hypervisor, error) { log := logger.FromContext(ctx) - // Get process manager for this hypervisor type - pm, err := m.getProcessManager(stored.HypervisorType) + // Get VM starter for this hypervisor type + starter, err := m.getVMStarter(stored.HypervisorType) if err != nil { - return fmt.Errorf("get process manager: %w", err) + return 0, nil, fmt.Errorf("get vm starter: %w", err) } - // Start hypervisor process and capture PID - log.DebugContext(ctx, "starting hypervisor process for restore", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion) - pid, err := pm.StartProcess(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath) + // Restore VM from snapshot (handles process start + restore) + log.DebugContext(ctx, "restoring VM from snapshot", "instance_id", stored.Id, "hypervisor", stored.HypervisorType, "version", stored.HypervisorVersion, "snapshot_dir", snapshotDir) + pid, hv, err := starter.RestoreVM(ctx, m.paths, stored.HypervisorVersion, stored.SocketPath, snapshotDir) if err != nil { - return fmt.Errorf("start hypervisor: %w", err) - } - - // Store the PID for later cleanup - stored.HypervisorPID = &pid - log.DebugContext(ctx, "hypervisor process started", "instance_id", stored.Id, "pid", pid) - - // Create hypervisor client - hv, err := m.getHypervisor(stored.SocketPath, stored.HypervisorType) - if err != nil { - return fmt.Errorf("create hypervisor client: %w", err) - } - - // Restore from snapshot - log.DebugContext(ctx, "invoking hypervisor restore API", "instance_id", stored.Id, "snapshot_dir", snapshotDir) - if err := hv.Restore(ctx, snapshotDir); err != nil { - log.ErrorContext(ctx, "restore API call failed", "instance_id", stored.Id, "error", err) - hv.Shutdown(ctx) // Cleanup - return fmt.Errorf("restore: %w", err) + return 0, nil, fmt.Errorf("restore vm: %w", err) } - log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id) - return nil + log.DebugContext(ctx, "VM restored from snapshot successfully", "instance_id", stored.Id, "pid", pid) + return pid, hv, nil } diff --git a/lib/instances/standby.go b/lib/instances/standby.go index b4391ff0..72ce467c 100644 --- a/lib/instances/standby.go +++ b/lib/instances/standby.go @@ -129,8 +129,8 @@ func (m *manager) standbyInstance( // Record metrics if m.metrics != nil { - m.recordDuration(ctx, m.metrics.standbyDuration, start, "success") - m.recordStateTransition(ctx, string(StateRunning), string(StateStandby)) + m.recordDuration(ctx, m.metrics.standbyDuration, start, "success", stored.HypervisorType) + m.recordStateTransition(ctx, string(StateRunning), string(StateStandby), stored.HypervisorType) } // Return instance with derived state (should be Standby now) diff --git a/lib/instances/start.go b/lib/instances/start.go index b57fd12b..36b15328 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -111,8 +111,8 @@ func (m *manager) startInstance( // Record metrics if m.metrics != nil { - m.recordDuration(ctx, m.metrics.startDuration, start, "success") - m.recordStateTransition(ctx, string(StateStopped), string(StateRunning)) + m.recordDuration(ctx, m.metrics.startDuration, start, "success", stored.HypervisorType) + m.recordStateTransition(ctx, string(StateStopped), string(StateRunning), stored.HypervisorType) } // Return instance with derived state (should be Running now) diff --git a/lib/instances/stop.go b/lib/instances/stop.go index 1d7ee112..eff32a66 100644 --- a/lib/instances/stop.go +++ b/lib/instances/stop.go @@ -84,8 +84,8 @@ func (m *manager) stopInstance( // Record metrics if m.metrics != nil { - m.recordDuration(ctx, m.metrics.stopDuration, start, "success") - m.recordStateTransition(ctx, string(StateRunning), string(StateStopped)) + m.recordDuration(ctx, m.metrics.stopDuration, start, "success", stored.HypervisorType) + m.recordStateTransition(ctx, string(StateRunning), string(StateStopped), stored.HypervisorType) } // Return instance with derived state (should be Stopped now) diff --git a/lib/instances/types.go b/lib/instances/types.go index 0f3356f1..5d7d05f4 100644 --- a/lib/instances/types.go +++ b/lib/instances/types.go @@ -85,6 +85,12 @@ type Instance struct { HasSnapshot bool // Derived from filesystem check } +// GetHypervisorType returns the hypervisor type as a string. +// This implements the middleware.HypervisorTyper interface for OTEL enrichment. +func (i *Instance) GetHypervisorType() string { + return string(i.HypervisorType) +} + // CreateInstanceRequest is the domain request for creating an instance type CreateInstanceRequest struct { Name string // Required @@ -97,6 +103,7 @@ type CreateInstanceRequest struct { NetworkEnabled bool // Whether to enable networking (uses default network) Devices []string // Device IDs or names to attach (GPU passthrough) Volumes []VolumeAttachment // Volumes to attach at creation time + Hypervisor hypervisor.Type // Optional: hypervisor type (defaults to config) } // AttachVolumeRequest is the domain request for attaching a volume (used for API compatibility) diff --git a/lib/instances/volumes_test.go b/lib/instances/volumes_test.go index cb8304be..6be9e24f 100644 --- a/lib/instances/volumes_test.go +++ b/lib/instances/volumes_test.go @@ -18,14 +18,14 @@ import ( "github.com/stretchr/testify/require" ) -// execWithRetry runs a command with retries until guest-agent is ready -func execWithRetry(ctx context.Context, vsockSocket string, command []string) (string, int, error) { +// execWithRetry runs a command with retries until exec-agent is ready +func execWithRetry(ctx context.Context, inst *Instance, command []string) (string, int, error) { var output string var code int var err error for i := 0; i < 10; i++ { - output, code, err = execCommand(ctx, vsockSocket, command...) + output, code, err = execCommand(ctx, inst, command...) if err == nil { return output, code, nil } @@ -105,13 +105,13 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { require.NoError(t, err) t.Logf("Writer instance created: %s", writerInst.Id) - // Wait for guest-agent - err = waitForGuestAgent(ctx, manager, writerInst.Id, 15*time.Second) - require.NoError(t, err, "guest-agent should be ready") + // Wait for exec-agent + err = waitForExecAgent(ctx, manager, writerInst.Id, 15*time.Second) + require.NoError(t, err, "exec-agent should be ready") // Write test file, sync, and verify in one command to ensure data persistence t.Log("Writing test file to volume...") - output, code, err := execWithRetry(ctx, writerInst.VsockSocket, []string{ + output, code, err := execWithRetry(ctx, writerInst, []string{ "/bin/sh", "-c", "echo 'Hello from writer' > /data/test.txt && sync && cat /data/test.txt", }) require.NoError(t, err) @@ -168,30 +168,30 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { require.NoError(t, err) assert.Len(t, vol.Attachments, 2, "Volume should have 2 attachments") - // Wait for guest-agent on both readers - err = waitForGuestAgent(ctx, manager, reader1.Id, 15*time.Second) - require.NoError(t, err, "reader-1 guest-agent should be ready") + // Wait for exec-agent on both readers + err = waitForExecAgent(ctx, manager, reader1.Id, 15*time.Second) + require.NoError(t, err, "reader-1 exec-agent should be ready") - err = waitForGuestAgent(ctx, manager, reader2.Id, 15*time.Second) - require.NoError(t, err, "reader-2 guest-agent should be ready") + err = waitForExecAgent(ctx, manager, reader2.Id, 15*time.Second) + require.NoError(t, err, "reader-2 exec-agent should be ready") // Verify data is readable from reader-1 t.Log("Verifying data from reader-1...") - output1, code, err := execWithRetry(ctx, reader1.VsockSocket, []string{"cat", "/data/test.txt"}) + output1, code, err := execWithRetry(ctx, reader1, []string{"cat", "/data/test.txt"}) require.NoError(t, err) require.Equal(t, 0, code) require.Contains(t, output1, "Hello from writer", "Reader 1 should see the file") // Verify data is readable from reader-2 (overlay mode) t.Log("Verifying data from reader-2 (overlay)...") - output2, code, err := execWithRetry(ctx, reader2.VsockSocket, []string{"cat", "/data/test.txt"}) + output2, code, err := execWithRetry(ctx, reader2, []string{"cat", "/data/test.txt"}) require.NoError(t, err) require.Equal(t, 0, code) assert.Contains(t, output2, "Hello from writer", "Reader 2 should see the file from base volume") // Verify overlay allows writes: append to the file and verify in one command t.Log("Verifying overlay allows writes (append to file)...") - output2, code, err = execWithRetry(ctx, reader2.VsockSocket, []string{ + output2, code, err = execWithRetry(ctx, reader2, []string{ "/bin/sh", "-c", "echo 'Appended by overlay' >> /data/test.txt && sync && cat /data/test.txt", }) require.NoError(t, err) @@ -201,7 +201,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { // Verify reader-1 does NOT see the appended data AND write fails (all in one command) t.Log("Verifying read-only enforcement and isolation on reader-1...") - output1, code, err = execWithRetry(ctx, reader1.VsockSocket, []string{ + output1, code, err = execWithRetry(ctx, reader1, []string{ "/bin/sh", "-c", "cat /data/test.txt && echo 'illegal' > /data/illegal.txt", }) require.NoError(t, err, "Exec should succeed even if write command fails") @@ -406,22 +406,22 @@ func TestVolumeFromArchive(t *testing.T) { require.NoError(t, err) t.Logf("Instance created: %s", inst.Id) - // Wait for guest-agent - err = waitForGuestAgent(ctx, manager, inst.Id, 15*time.Second) - require.NoError(t, err, "guest-agent should be ready") + // Wait for exec-agent + err = waitForExecAgent(ctx, manager, inst.Id, 15*time.Second) + require.NoError(t, err, "exec-agent should be ready") // Verify files from archive are present t.Log("Verifying archive files are accessible...") // Check greeting.txt - output, code, err := execWithRetry(ctx, inst.VsockSocket, []string{"cat", "/archive/greeting.txt"}) + output, code, err := execWithRetry(ctx, inst, []string{"cat", "/archive/greeting.txt"}) require.NoError(t, err) require.Equal(t, 0, code, "cat greeting.txt should succeed") assert.Equal(t, "Hello from archive!", strings.TrimSpace(output)) t.Log("✓ greeting.txt verified") // Check data/config.json - output, code, err = execWithRetry(ctx, inst.VsockSocket, []string{"cat", "/archive/data/config.json"}) + output, code, err = execWithRetry(ctx, inst, []string{"cat", "/archive/data/config.json"}) require.NoError(t, err) require.Equal(t, 0, code, "cat config.json should succeed") assert.Contains(t, output, `"key": "value"`) @@ -429,14 +429,14 @@ func TestVolumeFromArchive(t *testing.T) { t.Log("✓ data/config.json verified") // Check deeply nested file - output, code, err = execWithRetry(ctx, inst.VsockSocket, []string{"cat", "/archive/data/nested/deep.txt"}) + output, code, err = execWithRetry(ctx, inst, []string{"cat", "/archive/data/nested/deep.txt"}) require.NoError(t, err) require.Equal(t, 0, code, "cat deep.txt should succeed") assert.Equal(t, "Deep nested file content", strings.TrimSpace(output)) t.Log("✓ data/nested/deep.txt verified") // List directory to confirm structure - output, code, err = execWithRetry(ctx, inst.VsockSocket, []string{"find", "/archive", "-type", "f"}) + output, code, err = execWithRetry(ctx, inst, []string{"find", "/archive", "-type", "f"}) require.NoError(t, err) require.Equal(t, 0, code, "find should succeed") assert.Contains(t, output, "/archive/greeting.txt") diff --git a/lib/middleware/resolve.go b/lib/middleware/resolve.go index 75593370..705614a9 100644 --- a/lib/middleware/resolve.go +++ b/lib/middleware/resolve.go @@ -8,8 +8,16 @@ import ( "github.com/go-chi/chi/v5" "github.com/onkernel/hypeman/lib/logger" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) +// HypervisorTyper is implemented by resources that have a hypervisor type. +// This allows the middleware to enrich logs/traces without importing the instances package. +type HypervisorTyper interface { + GetHypervisorType() string +} + // ResourceResolver is implemented by managers that support lookup by ID, name, or prefix. type ResourceResolver interface { // Resolve looks up a resource by ID, name, or ID prefix. @@ -115,6 +123,23 @@ func ResolveResource(resolvers Resolvers, errResponder ErrorResponder) func(http logKey = "image_name" } log := logger.FromContext(ctx).With(logKey, resolvedID) + + // For instances, also add hypervisor type to logs and traces + if resourceType == "instance" { + if hvTyper, ok := resource.(HypervisorTyper); ok { + hvType := hvTyper.GetHypervisorType() + if hvType != "" { + log = log.With("hypervisor", hvType) + + // Add to trace span if one exists + span := trace.SpanFromContext(ctx) + if span.IsRecording() { + span.SetAttributes(attribute.String("hypervisor", hvType)) + } + } + } + } + ctx = logger.AddToContext(ctx, log) next.ServeHTTP(w, r.WithContext(ctx)) diff --git a/lib/network/derive.go b/lib/network/derive.go index f568c035..53f8b773 100644 --- a/lib/network/derive.go +++ b/lib/network/derive.go @@ -9,7 +9,6 @@ import ( "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/logger" - "github.com/onkernel/hypeman/lib/vmm" ) // instanceMetadata is the minimal metadata we need to derive allocations @@ -18,6 +17,8 @@ type instanceMetadata struct { Name string NetworkEnabled bool HypervisorType string + IP string // Assigned IP address + MAC string // Assigned MAC address } // deriveAllocation derives network allocation from CH or snapshot @@ -49,58 +50,38 @@ func (m *manager) deriveAllocation(ctx context.Context, instanceID string) (*All } netmask := fmt.Sprintf("%d.%d.%d.%d", ipNet.Mask[0], ipNet.Mask[1], ipNet.Mask[2], ipNet.Mask[3]) - // 4. Try to derive from running VM first - socketPath := m.paths.InstanceSocket(instanceID, hypervisor.SocketNameForType(hypervisor.Type(meta.HypervisorType))) - if fileExists(socketPath) { - client, err := vmm.NewVMM(socketPath) - if err == nil { - resp, err := client.GetVmInfoWithResponse(ctx) - if err == nil && resp.JSON200 != nil && resp.JSON200.Config.Net != nil && len(*resp.JSON200.Config.Net) > 0 { - nets := *resp.JSON200.Config.Net - net := nets[0] - if net.Ip != nil && net.Mac != nil && net.Tap != nil { - log.DebugContext(ctx, "derived allocation from running VM", "instance_id", instanceID) - return &Allocation{ - InstanceID: instanceID, - InstanceName: meta.Name, - Network: "default", - IP: *net.Ip, - MAC: *net.Mac, - TAPDevice: *net.Tap, - Gateway: defaultNet.Gateway, - Netmask: netmask, - State: "running", - }, nil - } + // 4. Use stored metadata to derive allocation (works for all hypervisors) + if meta.IP != "" && meta.MAC != "" { + tap := generateTAPName(instanceID) + + // Determine state based on socket existence and snapshot + socketPath := m.paths.InstanceSocket(instanceID, hypervisor.SocketNameForType(hypervisor.Type(meta.HypervisorType))) + state := "stopped" + if fileExists(socketPath) { + state = "running" + } else { + // Check for snapshot (standby state) + snapshotConfigJson := m.paths.InstanceSnapshotConfig(instanceID) + if fileExists(snapshotConfigJson) { + state = "standby" } } - } - // 5. Try to derive from snapshot - // Cloud Hypervisor creates config.json in the snapshot directory - snapshotConfigJson := m.paths.InstanceSnapshotConfig(instanceID) - if fileExists(snapshotConfigJson) { - vmConfig, err := m.parseVmJson(snapshotConfigJson) - if err == nil && vmConfig.Net != nil && len(*vmConfig.Net) > 0 { - nets := *vmConfig.Net - if nets[0].Ip != nil && nets[0].Mac != nil && nets[0].Tap != nil { - log.DebugContext(ctx, "derived allocation from snapshot", "instance_id", instanceID) - return &Allocation{ - InstanceID: instanceID, - InstanceName: meta.Name, - Network: "default", - IP: *nets[0].Ip, - MAC: *nets[0].Mac, - TAPDevice: *nets[0].Tap, - Gateway: defaultNet.Gateway, - Netmask: netmask, - State: "standby", - }, nil - } - } + log.DebugContext(ctx, "derived allocation from metadata", "instance_id", instanceID, "state", state) + return &Allocation{ + InstanceID: instanceID, + InstanceName: meta.Name, + Network: "default", + IP: meta.IP, + MAC: meta.MAC, + TAPDevice: tap, + Gateway: defaultNet.Gateway, + Netmask: netmask, + State: state, + }, nil } - // 6. No allocation (stopped or network not yet configured) + // 5. No allocation (network not yet configured) return nil, nil } @@ -164,22 +145,6 @@ func (m *manager) loadInstanceMetadata(instanceID string) (*instanceMetadata, er return &meta, nil } -// parseVmJson parses Cloud Hypervisor's config.json from snapshot -// Note: Despite the function name, this parses config.json (what CH actually creates) -func (m *manager) parseVmJson(path string) (*vmm.VmConfig, error) { - data, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("read config.json: %w", err) - } - - var vmConfig vmm.VmConfig - if err := json.Unmarshal(data, &vmConfig); err != nil { - return nil, fmt.Errorf("unmarshal config.json: %w", err) - } - - return &vmConfig, nil -} - // fileExists checks if a file exists func fileExists(path string) bool { _, err := os.Stat(path) diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index e170ac25..129eaf9b 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -29,6 +29,12 @@ const ( BearerAuthScopes = "bearerAuth.Scopes" ) +// Defines values for CreateInstanceRequestHypervisor. +const ( + CreateInstanceRequestHypervisorCloudHypervisor CreateInstanceRequestHypervisor = "cloud-hypervisor" + CreateInstanceRequestHypervisorQemu CreateInstanceRequestHypervisor = "qemu" +) + // Defines values for DeviceType. const ( Gpu DeviceType = "gpu" @@ -49,6 +55,12 @@ const ( Ready ImageStatus = "ready" ) +// Defines values for InstanceHypervisor. +const ( + InstanceHypervisorCloudHypervisor InstanceHypervisor = "cloud-hypervisor" + InstanceHypervisorQemu InstanceHypervisor = "qemu" +) + // Defines values for InstanceState. const ( Created InstanceState = "Created" @@ -135,6 +147,9 @@ type CreateInstanceRequest struct { // HotplugSize Additional memory for hotplug (human-readable format like "3GB", "1G") HotplugSize *string `json:"hotplug_size,omitempty"` + // Hypervisor Hypervisor to use for this instance. Defaults to server configuration. + Hypervisor *CreateInstanceRequestHypervisor `json:"hypervisor,omitempty"` + // Image OCI image reference Image string `json:"image"` @@ -160,6 +175,9 @@ type CreateInstanceRequest struct { Volumes *[]VolumeMount `json:"volumes,omitempty"` } +// CreateInstanceRequestHypervisor Hypervisor to use for this instance. Defaults to server configuration. +type CreateInstanceRequestHypervisor string + // CreateVolumeRequest defines model for CreateVolumeRequest. type CreateVolumeRequest struct { // Id Optional custom identifier (auto-generated if not provided) @@ -348,6 +366,9 @@ type Instance struct { // HotplugSize Hotplug memory size (human-readable) HotplugSize *string `json:"hotplug_size,omitempty"` + // Hypervisor Hypervisor running this instance + Hypervisor *InstanceHypervisor `json:"hypervisor,omitempty"` + // Id Auto-generated unique identifier (CUID2 format) Id string `json:"id"` @@ -404,6 +425,9 @@ type Instance struct { Volumes *[]VolumeMount `json:"volumes,omitempty"` } +// InstanceHypervisor Hypervisor running this instance +type InstanceHypervisor string + // InstanceState Instance state: // - Created: VMM created but not started (Cloud Hypervisor native) // - Running: VM is actively running (Cloud Hypervisor native) @@ -8268,111 +8292,112 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+w9DXMTO5J/RTW3W+vc2o4TPha8tXWVlwDPWwRSBPJu94UL8kzb1mNGGiSNE0Plv1+p", - "pfm0bE+AGLJQRRWOPZK6W/2tVs+nIBRJKjhwrYLhp0CFM0gofjzQmoazMxFnCbyCDxkobb5OpUhBagb4", - "UCIyri9SqmfmrwhUKFmqmeDBMDihekYuZyCBzHEWomYiiyMyBoLjIAq6AVzRJI0hGAa7Cde7EdU06AZ6", - "kZqvlJaMT4PrbiCBRoLHC7vMhGaxDoYTGivoNpY9NlMTqogZ0sMxxXxjIWKgPLjGGT9kTEIUDH+vovG2", - "eFiM/4BQm8UP5pTFdBzDEcxZCMtkCDMpgeuLSLI5yGVSHNrf4wUZi4xHxD5HOjyLY8ImhAsOOzVi8DmL", - "mKGEecQsHQy1zMBDmQhhumCRZwcOR8T+TEZHpDODq/oi+38bPwpWT8lpAsuT/pollPcMcQ1Y+fz4bHXu", - "5/d9MzORJNnFVIosXZ559PL4+A3BHwnPkjHI6oyP9ov5GNcwBWkmTEN2QaNIglJ+/PMfq7ANBoPBkO4P", - "B4P+wAflHHgk5EqS2p/9JN0bRLBmylYkdfMvkfTF2ehodEAOhUyFpDh2aaUGY1fJU8Wryjb1XfHx/6EE", - "qh3zr1QFftRe4gcak2ksxjSOFyTj7ENW45s+GRkR0CSVYs4iiLqE4g+EKUIzLXpT4CCphohMpEiIngGp", - "7C3pQH/a75Jzg27PbG6P7vcGg97gPKjvTny/N02zoBukVGuQBsD/+532Ph70/j3oPX5bfrzo997+9U++", - "jWzLcERMEE6HZyfflS7Jga1yYRPQ9Ry6ZpNXb98oodMb797hiDAzjkiYgARuMLHwRyJ8D7LPxG7MxpLK", - "xS6fMn41jKkGpevYrH92I34I2xrE+NSgfkPUGjKH7NaJxSXIkCogMRgGUV0SsSnTqkuoUdtUzUARY1P+", - "TkLKDc8qTaUmQhLgEblkekYoPlenQLLo0ZT1mAU16AYJvXoOfGrs5sN7S/xomLHjPvTe/nf+1c7/eFlS", - "ZjF4mPGVyDTjU4I/k4mQRM+YIiUMTEOC4/4kYRIMg//aLZ2BXecJ7ObUzWIwayWMj+ywvQISKiVd+Hct", - "B27d7ilN+Rq9YgXIg99RbtkUcdpSES0IRb8F8X128mbXiGRKldIzKbLprLorv+f64G2FFkvUrSPZDYDP", - "zXM0iphVbSc1cD3GtAr0Ez5nUvAEuCZzKplhvppx+hS8eHn05OLJi7NgaCgRZaHT9CcvX70OhsG9wWBQ", - "gauk50zoNM6mF4p9hJqbFNx79kvQBOSggJ8kkAi5QIq5OUhnVhePiZAJ1SRm74Gcm/nOA6PC9p41Fdc+", - "LrVs940SaaVfNigOGqeMw0rN0f1epP1SyPexoFFv7ysLOwdt5l5G8YX9gYSCT9g0sw6CE3sgzImZsX01", - "fgVuKBLVGMZ6mvXpf5uBnoGsSFg+pfnKWjocTnIIKxSpua5VJ3yJicUcZEwXHibeG3i4+DfJNO6oG0ci", - "pt4TM3gDC5vZLA8/GCwz8cDPxR6gPDD9YjjKyVQbSApA9vaP3cf9tnI1D9NM1UDab4LzAj1p447MmdQZ", - "jcnhyZuayvE61jZk86hdGxFWVa3b/4IfqCah0e2G/zRDK9DK1NiZMX5bVrx+62L1ymrrsiF89Xn4hcca", - "ZkqLhLAIuGYTZuK1hjPK6m5rfcfmIu6ZaBY1QEs1ZcFd9vyThZ3Kbsoq1ryYjpenPDUcyDiZsikdL3Td", - "2OwNlrfeT+h8fh+pV0XFlj0gutDCE+zl3DI6MnTMn20T8WIMfaHFxXzCPDMXmqr0vpkiYSMEd0xrpuil", - "IXMheZdczpjRbYrkREAVenZcdSL657xHDHBDclQsUExbTGmMiBF6a1o7QlaAYJxkCsh4sUMoOTvuk9cF", - "tH9RhFPN5pCnCWZUkTEAJxk3JgUiXB+TH1UAMmW8Paabw53DbjMKO+grCfdbn/y6SCGhnFyyOMZYK6Ga", - "hRiojVkDn8sZcLdRZiWjAHgh9f1zXuUsl5ppqvxugJoBoguqPR4rTJnSstQcStMkJZ1XTw/v3bv3uKmk", - "9x/0Bnu9vQev9wbDgfn376AbWOVqfAeqoefUzzaSJr65Dur6woW+VY1y+GZ0tO8sQn0d/fE+ffzo6orq", - "xw/ZpXr8MRnL6R/36FbSKn71dFTG7KSTKZC9XPUZrvJF6pWAeEUk/tkB9o0yOvaL9ebHYvfaPHkbOaCG", - "XsXECz7S/YwsTVMJ1uRqtY5+7chQx8d8a/yDkvMNOjxLDJwuXxKyyrQlXZ9IKaQnHyoizzoHaRqzEKW7", - "p1II2YSFBMwMxAwgnQQ1CxSeUp2sYxpdSGfJvSKtKYs9PFOJdOxi7knSMWo5yWLN0hjsb8ilrZwVxPwI", - "Z/JFiYxzkBeQk+cGMyWglDdYasQwOS7FI2hlIhhn06khSZV0x0yhcShtGoM4GtrYayOr4m6WgPnYq4pD", - "S254bqKvXgxziKtMYDWKATYREkjBJ3bTalgxPqcxiy4YTzMvS6wk5dNMootgJyV0LDKN7oDdsOoimDZD", - "N29iJM5LrCVy/Ao0tkcidUooTXXmwi4rXuK9oWe5nHi/cTvcJL5tGOVhdmMDEo8WOzw+sjo6FFxTxkGS", - "BDR1BzCVJAnm6oJu0DM8FVFIBCdiMvn7+rTJCi+uEJB1fsBhNXq4PR+ATV1Q0PRClIjnEJGEcjYBpYl7", - "srqymtH9Bw+HdBzu7d+LYHL/wcN+v+9bBriWi1Qw7lnqSfFbu63YtQmRXjlnX82+bB9uIZ3VBpdPwcnB", - "61+DYbCbKbkbi5DGu2rM+LDyd/Fn+QN+sH+OGfemwQqd24AUVYzTCCbisGJkHOcJZXHjLDLN4th9PzSY", - "cAgLhhSobDZGKX4X6oVhzZh9hIh4k+uaTo0vZTnuy7Lo3eBDBhlcpEIxu/qSI+N+MdHIOGNxRHBE9VxS", - "26/qse3+SvQrLiRGjDbiXHYkiyyNWdk849bMuGaxDZpqKz649/DR3waP9/Yrws24fng/aAVKoXYbmRrE", - "2f1aujwp8MhaUMMG9lMo+NxIBf6B8Bk9YxmnpsDz35Y241LI94xPLyLm4c7f7I8kYhJCjdnYzTIU7NI0", - "3cyK/oC+0GkF+hs8SHcQ4LEu31yTf07oVV/95fSfH/5Xnfztj70Pz8/O/jV/9s+jF+xfZ/HJyy/KMa8/", - "A/qmBzlrs2sYb9QOcNqyxzHVocfxmQmlV1DN/UK0IIkZ3CeHlJMxDM95jzxnGiSNh+Q8oCnrO2L2Q5Gc", - "B6QDVzTUdhQRnJipyAxoBHLHDD6xeXYz+FOeprhuzhEtOE1YSKQj8pgqE85yorJxJBLK+M45P+duLpIj", - "ojB9Yz5FJKSpziSYHSFhJuMFGUsaQnEuXS7eJZ9oml7vnHM9o5rAlZYGg5RKXRwY5yvgRjuobHrIPQ4R", - "mdM4A0VCJNQ5L+xHZEAwk2gqp6D7RUoW/f1GimYFUbwxuZC6lmV+NOh69pGY58xGxkxp4KQ4f2AKmZd0", - "8jOCR4Oa+D8aPNqciSx4aA37IXcvVynlTNlCPiwD49JWGV/MtE43lx2hvrEyQn59/frEkMH8f0ryiUpa", - "FFvcETxeEGriYlA2v6Zj9EncscxO4Muh2d1tidBr+7AZFqvNeDzBhcnr56dEg0wYt/q7ExpyTkz4DjbT", - "w5TKDCsySg4Oj5/s9FuUWSFtC/jX7OPrAsNGwj4/xlpOYuCI8hDC0LdLRkdd4045CS0dLcygPhWSxFbB", - "lHI9JG8U1M8zcKtsssfuZLwoS06sVj8PdvIZ06amGJJXhX9HC1CKQpaSGfIpS7nEac/5b4YxbHp3afZu", - "HVZMXLv4xak2TOZSTVzuBE3xalWwXvw9FEeZF7x5yngz2a4eT5rF/KxR7v2teyD3buaB3E5RwPIRP1UX", - "itNUzYReffBBSf4MgSumdM1nWN6glan65YKCusK3pQJrTjrblQZ8y7z591eWsLaQ4EurAZyL0a4YwMda", - "VT2TH9l99vl/N2Ce44oDpdiUQ0RGJ2WBXxmQ5tM3Uu6P9/t7Dx/19waD/t6gTXie0HDN2scHh+0XH+zb", - "gGVIx8MwGsLkC9IDbtusQaDxJV0ocp6b7PPA+ggV56DClM6st0pQLpdZfF5VRfPgY1PdxE3qJFppDyzI", - "WaH6T7FY5+Z6/8FKvb9xV00wDZsdMytEp/hwPuriJokrIKHI4oj/RZOxkTzrqkHkPEoF2nKKfZYp8oa/", - "5+KS11G3+Qsjvx8ykAtydnxcy3ZJmGSq3Zm80iJNV+6DSG+0DfsbzO9GaCplMdsohWlqwop+/eqFL9XQ", - "PD++sVzXIkSv8t3qigycDsNvW0QTDQ1nEDc7GWeaFLVwhuUOY5FFWEkg50xhLaZmc0CP+FXGOeNTMwPa", - "jND8Ei+ItN+vH3xCDfvlY1P8a/2I01mmI3HJcYyaZZqYvxBkg4Jzh9ZPYTl5SF4IHOMg7Rr13/Cr7OOU", - "R+PF8uNNH6xjo3XjnmshIcLFnFgOydNCFAthdsLbUeA+Wg3hjvnwCHPHuvEub+l2K+gGjupBN7AkDLpB", - "Thnz0WKInxD4oBs4QLyHvCdUz0Z8IpYd75uoLJdOz8Oc1CCplFEzEXAG0U6fvKzpLkc3TNDHCkiUgas5", - "sXSQ1JX5UBt8pFTPkDFxIOPTfj2l31ywjSKxMKyvMcJ13YNtfB7lTwG/lhnSyvrpitAyGdzKW2fqYsJi", - "aDOxhGkWU0nw+XYgq0USM/6+zexqkYxFzEJiBjQN0kTEsbi8MD+pfyAuO62wMwMuyrxHw8BY4FzWy25I", - "Y90ShX8YLHcaefTQWINdO34X74G1cSG959pPWQwkwXqGN5xdVRi9XgJzf3+w6thkxaS1A5N6Gd/+fc/B", - "yIaY27Gsz1JYg7Sqoi/Jbws2DvSZ0sa2uiI3UnmYdCBJ9SI/Usrt5c7NDORBMaGv4uJrJwEGj7/GMcSb", - "tecO/yE1olWfJF9kozeytKcrk33eIqzRUTOatSrIXTWtx6eNshGle/bg3ls0suZKq71bivrFJdqnWbMy", - "4AbXWFdZlFJybD6vvMe6SVGuSKvZArIKZhVIVu+NdUi/8M4vU/ll388kmYtBN2eurftj9G2vYIk8gDU+", - "06VkeNDsCGQJa0hQWIRls7M+Tj6mV8UKqJypIo1af4tHmdvFav+dPnmVV2KxST4FglH3Wfb8QW/7y9A5", - "Vy1vxrrb0XnI4xU8p3/WaLRVstVgznKN7voL2EZ1QZhJphenxiBYNhwDlSAPMsuGaCkQCfy6XBxPb66v", - "sSRv4qkWfwYcJAvJwckIuSShnE7Nlp0dk5hNIFyEMbjk+1LYgDeAXh6OevbUMM/VYe6WaSRIXmB9cDLC", - "2k6p7LqD/n4f73GJFDhNWTAM7vX3sHrVkAFR3K3cfXOOjxFENGWjyJncI/eMIa5KBVf2+f3BwNbece2U", - "Ky3LL3f/ULYexBpYVLZt7LCr9F+OUZdS2rkzILGkGwyn58hcd4P7g70bAbexfNIHwhtOMz0Tkn2EyCz6", - "4IYU+axFR1yD5DQmCuQcpCsorLJwMPy9zry/v71+2w1UliRULnLS+emWCuXhgur17MDKGCj9i4gWXw1f", - "3w3w67pAG+11vcSEX2+fc95bprkrSy9JZllsC7v9C42KY7WOK0ctjgtrte/fiunvD+7f/qKVKxNFoSwR", - "9pDSAvH49oE4FHwSs1CTXg6LuxVMaGxv4dQZ5K6og1cOakJzvCZ4ylxeaTbT5aZil+a9StYajUZHk+1Y", - "j2YblRuYkQKryiWFn5ZkE+scMRUa57LKLb2QppXGLaqU0yoXfWLRtfWVYrC54joPHeH3hclJqaQJaJAK", - "YVpxT5+UTU2Y+SHPs2CYa4PIujnpVmjY9CXfLnHs/ZUXljLetA1bUIpHDYX4DRVh47C2cmvvLnHzm2IX", - "81tK112/hnsG+vtizcH2vKD8GtS3ZPO7wlHPQOciUpDNaMFZcX1nFXu5Cz63uNFuBQ/ipyb6tFJtAbWH", - "hCVadigJZxC+twjhQeH6MHJkH9mGH2BvKd3A+jvwf5r7FoFjSat1weLInRzfXqxYazfVKlTc/2oQOAbz", - "EBmLusb5tQ97fE3Vgoc73yJm/M+OCps3J++QJJ1kcYztG9y1n/KuVlWf7n4y/kELPzmXtrW+yJtXz3vA", - "QxFB5IoPVzsk+dWMr+st2w2zqPxkkzbxFZIqZ4zVzugX7L89OSi77f15/6mrOf3z/lNbdfrnewdl073b", - "YZbBtlTztr3XO8x8xnlldaKharIXKDZ5e8VTW3H43E21m7h8BYA/vb42Xl+VXGsdv+LS4C26fvWGnFs+", - "JyiYzUdt/CmvZPzBXL7tpp4cR9ojUqzMqOXiXXEftpx0959sS6y7JHqu4oAVHFfVvy1zqKVArvUOctYd", - "HXXd1TZ7IS2VMGFX28uo5nBs3Ut0624/nXqQjNk0E5mq3j/Cm4ygyu5BNQV81/zX0jyv9GC/Yy4dbNN0", - "bN1B/cn3t+Q6NzfUKm97LLLJec6f2o7zXB7VtPeecwh/es+tvOcKudZ7z8XFntt0n+sd0bfuP+f85iO4", - "q6v8ET3oO+aVUu5y3JXD3pqOa+2glpeF19v+sqHv1g/6i8W375fmTSLuYg4Jb2Zir/vcEyxtzWpX8Hvj", - "h8F2dd/2XcC7zGLPqk1Z/M4WKqLdWEyrblfzCrEEmpT9K4h5mlBFThGw3ilwTZ7MDVb9c553cHmnRCZD", - "eEcKRiVaEAUxhNo1+o4FNrJWOD/eeX1H0/Rd0aloZ0ieYXlnhbp28Y4CyWhMQsGViO3d0XfzJHk3XK4T", - "Pzs+xkH4zMxWhL8bFs23CxlT5qlzfs5fgc4kV4hFTJUmL0jMOCjSMRsuRRxDRMYL8s7Qs4LfDraDMTPa", - "NiTx4pybEYxnoByWjE8Jh0s3IZuQd/ZSHt4/eGc7w6yU+udml76R5HdX3+e2uGhBJBLOdtcBbNeK6+L9", - "9nJh10q2XKq4RbE38F53+rSc6EKaeklKJxpbZTBt+ENk2ran9QFiKe8HZeUVn+VOulNiOb3ByjRN27Kv", - "AxO5eJ4ka3iYdGbll0pHItN/VToCaZuuOe5exdykQ0P7h6bvbYuwWmMae6PZRyqLoZ9UgW2EmF+Etn/N", - "kyToBg4ez8XmFpZEw5XeBaNWepasdZ3anHA5HjM7gwNJ5/T0yc5Pm9HSLUGS1ZW9I6DHcrgb9XhTzRu8", - "vbIP/PCeS9564Buz4faPIipQMGyKwqPxwr0Jo+indafuBOBGlpihvXN4eWUk/22ljLhWED+8jJT88YNL", - "SSgkdq1UeZeiu1O8VYk4KuLewQYyZWOWbh71nh0f76wSGtv4cKXIyJ/hsKuj/OFtCvbUuXvSYpuk0QKB", - "dclCIxB6ZYyex6yM23YZJtSwb9agyy0TsPeMWigNiQ3YJ1mMF9uwah1fZDXJx9lagS6+2sqwv309Y6Wv", - "yjkfw8TYwxSkWdsMN/NXYg9fWHuqaSG+J1YGv4+4FrsoYChH9SqqLTWCzxso+GKnoufDZ4P0FAPVem8f", - "RTr4DkMEc65IbD7srI10beOfm8W7t6nhitZWvlutlmcLZv4RNNyoodbyNm13Tq09g6qw5PoHN9qn1kS6", - "zsyL9KeVdz3jfvrEd9InxoOeApvOVNIQLa5yXQH9/q9rfLn7yX4YbTou1DScneXtp74PU+q61WxaJkfw", - "TgilwykC98rUrcukKBoK3dFrG/jaYocCpk6qB59+K2Ablf1o3P31a1yqdLxRhctWZat4HfH3IlvbtnwO", - "hrxcu0qPuyLmltNyTLRohLaVrs4rK/1cg+et1Pk51XKDKr8cg58FUS1q/CrEyhW8r32gIhRPcu3jfXKa", - "pamQWhF9KbCrq8KT1X+evnxBxiJaDEkxjhPb5NQxnOtO6V5gChF2CDRjj7F2lkp8IVBSmSAfmUropSLN", - "Ymy3jRcoHI2tsaJEU9mffiRUhjM2B09qo/r2+1stVmwq8m6Q5OjtGvSwJ2l90ubLYQtY6vtRx9Hmc9z7", - "8PDt47OixWQ+RaVP65hxKhdtm7Q2X/k/L8zqXXzj/zG9YkmWFO2Cn/1COu7VYfiWYHz3MZsUPAVXIUCk", - "8Bx+54adX5ebvrq98HRx3GoVa65NV1r4b1jBWraKM1uMr053TK6FIDGVU9j5Ye6JOVkrr4mNjhqXxO5g", - "7e08577Sz2hZbdsuwGjp999GpW0RfG63zvbs+/GJK9207uBlr3nhZq4q8P2+WHCwPZOw7cLeszucQ3kG", - "uUtdKerFCcyMPoZ5LkIakwjmEIsUe5vbZ4NukMnYdWoe7tp3f8+E0vjmwOD67fX/BwAA//+WhsS+pZgA", - "AA==", + "H4sIAAAAAAAC/+w9DXMTObJ/RTXvrs55ZztO+Djw1dWrbAKsrwikCGTf3YYX5Jm2rWVGGiSNE0Plv79S", + "S5ovj+0JEEMOqrZqTUYjtVr9re6eT0EoklRw4FoFw0+BCmeQUPx5oDUNZ2cizhJ4BR8yUNr8OZUiBakZ", + "4KBEZFxfpFTPzL8iUKFkqWaCB8PghOoZuZyBBDLHWYiaiSyOyBgIvgdR0A3giiZpDMEw2E243o2opkE3", + "0IvU/Elpyfg0uO4GEmgkeLywy0xoFutgOKGxgm5t2WMzNaGKmFd6+E4+31iIGCgPrnHGDxmTEAXD38vb", + "eJsPFuM/INRm8YM5ZTEdx3AEcxbCMhrCTErg+iKSbA5yGRWH9nm8IGOR8YjYcaTDszgmbEK44LBTQQaf", + "s4gZTJghZulgqGUGDZiJEKYLFjWcwOGI2MdkdEQ6M7iqLrL/t/GjYPWUnCawPOmvWUJ5zyDXgOXnx7Hl", + "uZ/fb5qZiSTJLqZSZOnyzKOXx8dvCD4kPEvGIMszPtrP52NcwxSkmTAN2QWNIglKNe/fPyzDNhgMBkO6", + "PxwM+oMmKOfAIyFXotQ+bkbp3iCCNVO2QqmbfwmlL85GR6MDcihkKiTFd5dWqhF2GT3lfZXJpnoqTfR/", + "KIFqR/wrRUHz1l7iDxqTaSzGNI4XJOPsQ1ahmz4ZGRbQJJViziKIuoTiA8IUoZkWvSlwkFRDRCZSJETP", + "gJTOlnSgP+13ybnZbs8cbo/u9waD3uA8qJ5OfL83TbOgG6RUa5AGwP/7nfY+HvT+Peg9flv8vOj33v71", + "T00H2ZbgiJggnG6fHX8qXeKBLVNhHdD1FLrmkFcf3yih0xuf3uGIMPMekTABCdzsxMIfifA9yD4TuzEb", + "SyoXu3zK+NUwphqUru5m/diN+0PY1myMT83Wb7i1Gs8huXVicQkypApIDIZAVJdEbMq06hJqxDZVM1DE", + "6JS/k5ByQ7NKU6mJkAR4RC6ZnhGK46oYSBY9mrIes6AG3SChV8+BT43efHhviR4NMXbcj97b//Z/2vmf", + "RpKUWQwNxPhKZJrxKcHHZCIk0TOmSAED05Dge3+SMAmGwX/tFsbArrMEdj12sxjMWgnjI/vaXg4JlZIu", + "mk/NA7fu9JSmfI1csQzUsL8jr9kUcdJSES0IRbsF9/vs5M2uYcmUKqVnUmTTWflUfvfy4G0JF0vYrW6y", + "GwCfm3E0ipgVbScVcBuUaRnoJ3zOpOAJcE3mVDJDfBXl9Cl48fLoycWTF2fB0GAiykIn6U9evnodDIN7", + "g8GgBFeBz5nQaZxNLxT7CBUzKbj37JegDshBDj9JIBFygRhzc5DOrMoeEyETqknM3gM5N/OdB0aE7T2r", + "C659XGoJCbNFCnLOlGgwjn7Nn5njyxSUadUSR58c2c3gCSuQxnoKBZ+waWY1Yd9AwbPEnGoYiyzqlZbs", + "Bh8gwWMuAG0YtGytGNHXSipuEHc0ThmHlfKu+73IqEsh38eCRr29ryyiOGgz9/IWX9gH1cN0BAD5+RuN", + "XeEy4AYjUYXMrX1cnf63GegZyJJc8FOaP1n9jK8TD2EJIxWDu+w6LLGemIOM6aKB9fYGDbz3m2QaT9S9", + "RyKm3hPz8gbGM7NZznswWGa9QTPvNQDVANMvhqKcJGgDSQ7I3v6x+7nfVhrMwzRTFZD26+C8QPvfGFFz", + "JnVGY3J48qYiKBvdAetoNigL68eWFYQ7/5weqCah0UiG/jRD3dVKQdqZ0etcVhfNOtHKldU6cYPT3eSX", + "5HZ2mCktEsIi4JpNmPEyayY0qxrb1RObi7hnfHCUAC3FlAV32V9JFnYqeyirSPNiOl6e8tRQIONkyqZ0", + "vNBVFbk3WD76ZkT7+ZtQvcqXt+QB0YUWDS6qp5bRkcGjH9vGT0fP/0KLi/mENcycS6rCZ2CKhLXAgSNa", + "M0UvDZkLJHTJ5YwZ2aaIRwKK0LPjsunTP+c9YoAbkqN8gXzafEqjRAzTW4OgI2QJCMZRP48XO4SSs+M+", + "eZ1D+xdFONVsDj64MaOKjAE4ybhRKRDh+hiyKQOQKWOjMl1/3bkZNg6ygxaecM/6xJgMCeXkksUxeogJ", + "1SxE93LMavu5nAF3B2VWMgKAF4bFOS9Tlgso1UV+N0DJANEF1Q12NkyZ0rKQHErTJCWdV08P792797gu", + "pPcf9AZ7vb0Hr/cGw4H5799BN7DC1dgOVEPPiZ9thHqa5jqoygvnsJclyuGb0dG+0wjVdfTH+/Txo6sr", + "qh8/ZJfq8cdkLKd/3KNbCQY1i6ejItJAOpkC2fOiz1BVU3yh5MaviB98dljgRnEo+4f16sfu7rUZeRuR", + "q5pcxXARDul+RmypLgQrfLVaRr92aKjux/zV2AcF5ZdcABflCVlp2gKvT6S0jkgtiiuihnUO0jRmIXJ3", + "T6UQsgkLCZgZiHmBdBKULJBbSlW0jml0IZ0mb2RpTVncQDMl/8wu5kaSjhHLSRZrlsZgnyGVtjJWcOdH", + "OFOTb8s4B3kBHj03mCkBpRqdpZoP4/eSD0EtE8E4m04NSsqoO2YKlUOh0xjE0dD6XhtJFU+zAKyJvMp7", + "aEkNz4331YthDnGZCKxEMcAmQgLJ6cQeWmVXjM9pzKILxtOskSRWovJpJtFEsJMSOhaZRnPAHlh5EQz2", + "oZk3MRzXiKwldPwKNLYXOVVMKE115twuy17ifdWZFu83HoebpOkYRt7Nrh1A0iDFDo+PrIwOBdeUcZAk", + "AU3dtVEptIMRxqAb9AxNRRQSwYmYTP6+PtizworLGWSdHXBY9h5uzwZgU+cU1K0QJeI5RCShnE1AaeJG", + "lldWM7r/4OGQjsO9/XsRTO4/eNjv95uWAa7lIhWMNyz1JH/W7ih2bUCkV8zZV7MvO4dbCMK12cun4OTg", + "9a/BMNjNlNyNRUjjXTVmfFj6d/7P4gH+sP8cM94YvMtlbg1SFDFOIhiPw7KRMZwnlMW1G9Q0i2P396HZ", + "CYcwJ0iBwmajl9JsQr0wpBmzjxCRxisBTafGlrIU92Wx/27wIYMMLlKhmF19yZBxT4w3Ms5YHBF8o3yb", + "qu2fqr7t/srtl0xI9Bitx7lsSOZRGrOyGePWzLhmsXWaKis+uPfw0d8Gj/f2S8zNuH54P2gFSi52a5Ea", + "3LN7Wpg8KfDIalBDBvZXKPjccAX+A+EzcsYSTkWA+2dLh3Ep5HvGpxcRa6DO3+xDEjEJocYY8mYeCnZp", + "mm4mxWaHPpdp+fY3WJDu+qJBu3xzSf45rld19ZfTf374X3Xytz/2Pjw/O/vX/Nk/j16wf53FJy+/KMa8", + "/ubqm14/rY2uob9RuXZqSx7HVIcNhs9MKL0Ca+4J0YIk5uU+OaScjGF4znvkOdMgaTwk5wFNWd8hsx+K", + "5DwgHbiiobZvEcGJmYrMgEYgd8zLJzbObl7+5MMU1/U5ogWnCQuJdEgeU2XcWU5UNo5EQhnfOefn3M1F", + "/EYUhm/Mr4iENNWZBHMiJMxkvCBjSUPIb9OLxbvkE03T651zrmdUE7jS0uwgpVLn19x+BTxoB5UND7nh", + "EJE5jTNQJEREnfNcf0QGBDOJpnIKup+HZNHer4VoViCl0ScXUleizI8G3YZzJGacOciYKQ2c5PcPTCHx", + "ko6/I3g0qLD/o8GjzZHInIbWkB9S93JulSfKFvxhCRiXtsL4YqZ1ujlZCuWN5RHy6+vXJwYN5v+nxE9U", + "4CI/4o7g8YJQ4xeDsvE1HaNN4q5ldoKmGJo93ZYbem0Hm9ditXkfT3Bh8vr5KdEgE8at/O6EBp0T476D", + "jfQwpTJDioySg8PjJzv9FslhiNsc/jXn+DrfYS1g76+xloMY+EZxCWHw2yWjo64xpxyHFoYWRlCfCkli", + "K2AKvh6SNwqq9xl4VDbYY08yXhSJMlaqnwc7fsa0LimG5FVu39EclDz9piAGP2XBlzjtOf/NEIYN7y7N", + "3q3CioFr57840YbBXKqJi52gKl4tCtazfwPGkecFr98y3oy3y9eTZrFm0ijO/tYtkHs3s0BuJ5VhOTGB", + "qgvFaapmQq+++KDEjyFwxZRWy2kArUL1y2kQVYFvExzW3HR+zYQGmXGOtw71bXz1VIVvGcf//tIk1iY2", + "fGl2gjN52iUnNJF6We75K8TPzkfoBqzh+uRAKTblEJHRSZEmWTjIfvraFcDj/f7ew0f9vcGgvzdoEy5I", + "aLhm7eODw/aLD/atAzWk42EYDWHyBeEKd2xWQdH4ki4UOfcmxHlgbZaSsVIiSmdmtAqYLqd9fF6WR/0i", + "ZlMex03yNlpJM0wQWqGKTjF56OZ66MFKPbTxVI1zD5sNRctEpzjYv3Vxk0AakFBkccT/osnYcJ41HSFy", + "Fq4CbSnFjmWKvOHvubjk1a3beIrh3w8ZyAU5Oz6uRN8kTDLVLkdAaZGmK89BpDc6hv0N5sBGaEppOttI", + "zalLwpJ8/eqJOOVQgb9OslTXImRQprvVGSI4HYYDbFJPNDSUQdzsZJxpkufmGZI7NFqelGwHmw+BFvor", + "a0aYGVBnhOZJvMjNi7Uvn1BDfv7dFP+1/o3TWaYjccnxHTXLNDH/QpDNFpx5tn4KS8lD8kLgOw7SrhH/", + "NTvPDqc8Gi+Wh9dtwo6NHhh3QQsJES7m2HJInuasmDOzY96OAvfTSgh37YhXqjvWrXAmmTutoBs4rAfd", + "wKIw6AYeM+an3SH+QuCDbuAAabx0PqF6NuITsewI3ERkufC+d7tSs0mljJiJgDOIdvrkZUV2ObzhhUGs", + "gEQZuBwYiwdJXdoRtc5QSvUMCRNfZHzar14x1BdsI0gsDOtznnBdN7CNzaOaQ9KvZYa4sga3IrQITrfy", + "Hpi6mLAY2kwsYZrFVBIc3w5ktUhixt+3mV0tkrGIWUjMC3WFNBFxLC4vzCP1D9zLTqvdmRcuijhMTcFY", + "4FwUzh5Ibd1iC/8wu9ypxfVDow127fu7WE3XxoRsvGd/ymIgCeZXvOHsqkTo1ZSc+/uDVdc4KyatXOBU", + "0wr37zdc1GyIATiSbdIUViGtyjBMfM1lLcGAKW10q0u6I6XBpANJqhf+isvry52bKciDfMKmDJCvHZQY", + "PP4a1yJv1t6D/IfkrJZtEr/IRmtk6UxXBh8bk8JGR3Vv1oogV7Bb9U9raSxK92wiQWMSy5rCYFuhi/LF", + "Bf6nWT1T4QbFwKs0SsE5Nr5YVANvEpQrwnw2oa20sxIkq8/GGqRfWDnNlC+Z/kyUOR90cyTdmj9G3vZy", + "kvAOrLGZLiXDi2+HIItYg4JcIyyrnfV+8jG9yldA4UwVqdUe2H0UsWasPtjpk1c+M4xN/BQIRtVm2Wt2", + "etuXlHuqWj6MdTXm3uVpZDwnf9ZItFW8VSPOYo3u+jJ2I7ogzCTTi1OjECwZjoFKkAeZJUPUFLgJ/HOx", + "ON4mXV9jiuCkIXv9GXCQLCQHJyOkkoRyOjVHdnZMYjaBcBHG4C4DltwGrEh6eTjq2VtMH6vDWDLTiBCf", + "8H1wMsJcU6nsuoP+fh+r4UQKnKYsGAb3+nuYTWvQgFvcLVUQOsPHMCKqslHkVO6RG2OQq1LBlR2/PxjY", + "XECunXClRTro7h/K5qdYBYvCto0edpUHyz7qUojdGwMSU8zBULrfzHU3uD/YuxFwG9M5m0B4w2mmZ0Ky", + "jxCZRR/cECOfteiIa5Ccxr7UD9zAgoSD4e9V4v397fXbbqCyJKFy4VHXjLdUqAYqKBe5B5bHQOlfRLT4", + "avttqqO/rjK0kV7XS0T49c7Z094yzl2afIEyS2JbOO1faJRf83Vcemx+fVnJxf9WRH9/cP/2Fy2VcOSJ", + "u0TYS1MLxOPbB+JQ8EnMQk16HhZXW01obKuCqgRyV8TBKwc1oX5fE7z1LgrDzXReVexS3/FlrdKo9YXZ", + "jvaoN6O5gRrJd1UqmvipSTaRzhFToTEuy9TSC2laan+jCj4tU9EnFl1bWykGGyuu0tAR/j1XOSmVNAEN", + "UiFMK7odkKI1DDMPfJwF3VzrRFbVSbeEw7ot+XaJYu+vLKDKeF03bEEoHtUE4jcUhLXL2lIV4V2i5jf5", + "Kfqqqetus4R7Bvr7Is3B9qwgX5b1Lcn8rlDUM9CeRXK0GSk4y8uJVpGXKzi6xYN2KzRs/NR4n5arLaD2", + "krDYln2VhDMI39sN4UXhejdyZIdsww6wVVM30P4O/J/qvoXjWOBqnbM4cjfHt+crVpp2tXIV978aBI7A", + "GpCMSV1jX4Zir6+pWvBw51v4jP/ZXmG9kvMOcdJJFsfYTsKVIRW1Y2V5uvvJ2Act7GTPbWttkTevnveA", + "hyKCyCUfrjZIfKnI17WW7YHZrfwkkzb+FaLKE8ZqY/QLzt/eHBQ9C/+8/9TlnP55/6nNOv3zvYOideHt", + "EMtgW6J529brHSY+Y7yyKtJQNNmCjk3WXj5qKwafq5y7icmXA/jT6mtj9ZXRtdbwy4sYb9H0q7Y13fI9", + "QU5sTdjGRz6T8Qcz+bYbenIUaa9IMTOjEot3yX3YuNPVY9kWXXeJ9VzGAcsprix/W8ZQC4Zcax140h0d", + "dV2pnS2QSyVM2NX2Iqoejq1biW7d7YdTD5Ixm2YiU+X6I6ysBFV0M6oI4LtmvxbqeaUF+x1T6WCbqmPr", + "BupPur8l07l+oFZ422uRTcazH7Ud47m4qmlvPXsIf1rPraznErrWW895Yc9tms/VvvJbt589vTUh3OVV", + "/ogW9B2zSil3Me7SZW9FxrU2UIti4fW6v2gwvPWL/nzx7dulvmnFXYwhYWUm9t73lmCha1abgt8bPQy2", + "K/u2bwLeZRJ7Vm4S02xsoSDajcW0bHbVS4gl0KRoREHMaEIVOUXAeqfANXkyN7vqn3PfUeadEpkM4R3J", + "CdV+hCOGULvG47HAxtoK58ea13c0Td/lnZN2huQZpneWsGsX7yiQjMYkFFyJ2NaOvpsnybvhcp742fEx", + "voRjZjYj/N0wbwae85gyo875OX8FOpNc4S5iqjR5QWLGQZGOOXAp4hgiMl6Qdwafpf3tYHsaM6NtixIv", + "zrl5g/EMlNsl41PC4dJNyCbknS3Kw/qDd7ZTzUquf25O6Rtxfnd1PbfdixZEIuJstx/A9rG4Lta3Fwu7", + "1rbFUnkVxd6gsdzp03KgC3HaiFI60dgqg2lDHyLTtl1uEyAW882grCzxWe7sOyWW0mukTNO0Lfk6MJGK", + "50myhoZJp+jnQpSORKb/qnQE0jaBc9S9irhJh4b2H5q+ty3LKh1mbEVzE6rsDptRFdjGjL4Q2v5rniSB", + "bXeT0KbC5haaRMOV3gUjVnoWrVWZWp9w2R8zJ4Mvks7p6ZOdnzqjpVmCKKsKe4fABs3hKuqxUq3ReXtl", + "B/zwlotvPfCNyXD7VxElKBg2ReHReOG+zJH397pTNQF4kMXOUN+5fTXyiH+2kkdcK4gfnkcK+vjBuSQU", + "ErtoKt+l6O4kb5U8jhK7d7CBTNGYpeu93rPj451VTGMbMa5kGfnTHXZ5lD+8TsGeOnePW2yTNJpvYF2w", + "0DCEXumje5+Vcdsuw7ga9ksfdLllAvaeUQulIbEO+ySLsbANs9bxw1oT/57NFejip7YM+dvPRZb6qpzz", + "MUyMPkxBmrXN62b+ku/R5Naeapqz74nlwe/Dr8UuCujKUb0Ka0uN6X0DhSbfKe/58NkgPUVHtdrbR5EO", + "flMRwZwrEpsfO2s9Xdv452b+7m1KuLy1VVNVq6XZnJh/BAk3qok136btzom1Z1BmFi9/8KCbxJpI16l5", + "kf7U8q5n3E+b+E7axHjRk++mM5U0RI2rXFfAZvvXNb7c/WR/jDZdF2oazs58+6nvQ5W6bjWblvEbvBNM", + "6fYUgfuE69Z5UuQNhe5o2QZ+RtltAUMn5YvPZi1gG5X9aNT99XNcyni8UYbLVnkr/zzy98Jb29Z8Dgaf", + "rl3Gx11hc0tpfida1FzbUlfnlZl+rsHzVvL8nGi5QZaf38HPhKgWOX4lZHkB39Q+UBGKN7l2eJ+cZmkq", + "pFZEXwrs6qrwZvWfpy9fkLGIFkOSv8eJbXLqCM51p3QfVIUIOwSad48xd5ZK/EBRUprAv5lK6KUizWJs", + "t40FFA7HVllRoqnsTz8SKsMZm0NDaKP8Nf5bTVasC/JukPjt7ZrtYU/S6qT1j9XmsFTPo7pHG89x3+ez", + "3yXJW0z6KUp9WseMU7lo26T1pcuWIGGmtEj8vKMj0ql9ZNs1sPWf4d75Hru5HtMrlmRJ3i742S+k4z5l", + "hl8txm8xs0lOU3AVAkQK7+F3btj5dbnpqzuLhi6OW81i9dJ0pYb/hhmsRas4c8T4KXdH5FoIElM5hZ0f", + "pk7M8VpRJjY6qhWJ3cHc27mnvsLOaJlt287BaGn330ambe58bjfP9uz7sYlL3bTuYLHXPDczVyX4fl8k", + "ONieSth2Yu/ZHY6hPANvUpeSenECM2MTwTwXIY1JBHOIRYq9ze3YoBtkMnadmoe79lvkM6E0fskwuH57", + "/f8BAAD//+6fgWjrmQAA", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/lib/providers/providers.go b/lib/providers/providers.go index ecbeb708..82c9606c 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -9,6 +9,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/cmd/api/config" "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/hypervisor" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -114,7 +115,8 @@ func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager ima meter := otel.GetMeterProvider().Meter("hypeman") tracer := otel.GetTracerProvider().Tracer("hypeman") - return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, meter, tracer), nil + defaultHypervisor := hypervisor.Type(cfg.DefaultHypervisor) + return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, defaultHypervisor, meter, tracer), nil } // ProvideVolumeManager provides the volume manager diff --git a/openapi.yaml b/openapi.yaml index 7b25f121..c5d48dbe 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -160,6 +160,11 @@ components: description: Volumes to attach to the instance at creation time items: $ref: "#/components/schemas/VolumeMount" + hypervisor: + type: string + enum: [cloud-hypervisor, qemu] + description: Hypervisor to use for this instance. Defaults to server configuration. + example: cloud-hypervisor # Future: port_mappings, timeout_seconds Instance: @@ -254,6 +259,11 @@ components: type: boolean description: Whether a snapshot exists for this instance example: false + hypervisor: + type: string + enum: [cloud-hypervisor, qemu] + description: Hypervisor running this instance + example: cloud-hypervisor PathInfo: type: object