diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index 6d3026e9..12b22318 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -208,7 +208,7 @@ func Load() *Config { // Build system configuration MaxConcurrentSourceBuilds: getEnvInt("MAX_CONCURRENT_SOURCE_BUILDS", 2), - BuilderImage: getEnv("BUILDER_IMAGE", ""), + BuilderImage: getEnv("BUILDER_IMAGE", "hypeman/builder:latest"), RegistryURL: getEnv("REGISTRY_URL", "localhost:8080"), RegistryInsecure: getEnvBool("REGISTRY_INSECURE", false), RegistryCACertFile: getEnv("REGISTRY_CA_CERT_FILE", ""), // Path to CA cert for registry TLS diff --git a/lib/builds/builder_agent/main.go b/lib/builds/builder_agent/main.go index a74b2939..55bf202f 100644 --- a/lib/builds/builder_agent/main.go +++ b/lib/builds/builder_agent/main.go @@ -20,7 +20,6 @@ import ( "io" "log" "net" - "net/http" "os" "os/exec" "path/filepath" @@ -63,13 +62,12 @@ type SecretRef struct { // BuildResult is sent back to the host type BuildResult struct { - Success bool `json:"success"` - ImageDigest string `json:"image_digest,omitempty"` - Error string `json:"error,omitempty"` - Logs string `json:"logs,omitempty"` - Provenance BuildProvenance `json:"provenance"` - DurationMS int64 `json:"duration_ms"` - ErofsDiskPath string `json:"erofs_disk_path,omitempty"` + Success bool `json:"success"` + ImageDigest string `json:"image_digest,omitempty"` + Error string `json:"error,omitempty"` + Logs string `json:"logs,omitempty"` + Provenance BuildProvenance `json:"provenance"` + DurationMS int64 `json:"duration_ms"` } // BuildProvenance records build inputs @@ -543,19 +541,12 @@ func runBuildProcess() { log.Printf("=== Build Complete: %s ===", digest) provenance.Timestamp = time.Now() - // Try to create erofs disk by pulling the image from the registry. - // After the buildctl push, the image is in the registry. We pull the layers - // with curl, extract them, and run mkfs.erofs. This avoids the slow host-side - // umoci unpack step entirely. - erofsDiskPath := createErofsFromRegistry(config, digest) - setResult(BuildResult{ - Success: true, - ImageDigest: digest, - Logs: logWriter.String(), - Provenance: provenance, - DurationMS: duration, - ErofsDiskPath: erofsDiskPath, + Success: true, + ImageDigest: digest, + Logs: logWriter.String(), + Provenance: provenance, + DurationMS: duration, }) } @@ -1009,272 +1000,3 @@ func getBuildkitVersion() string { return strings.TrimSpace(string(out)) } -// createErofsFromRegistry pulls the image from the local registry, extracts layers, -// and creates an erofs disk. Returns the relative path on the source volume, or "" -// if any step fails (graceful fallback to host-side pipeline). -func createErofsFromRegistry(config *BuildConfig, digest string) string { - // Check if mkfs.erofs is available - if _, err := exec.LookPath("mkfs.erofs"); err != nil { - log.Printf("mkfs.erofs not available, skipping in-VM erofs creation") - return "" - } - - log.Println("=== Creating erofs disk from registry image ===") - start := time.Now() - - // Build the registry base URL - registryHost := config.RegistryURL - if strings.HasPrefix(registryHost, "https://") { - registryHost = strings.TrimPrefix(registryHost, "https://") - } else if strings.HasPrefix(registryHost, "http://") { - registryHost = strings.TrimPrefix(registryHost, "http://") - } - - scheme := "https" - if config.RegistryInsecure { - scheme = "http" - } - baseURL := fmt.Sprintf("%s://%s", scheme, registryHost) - repo := fmt.Sprintf("builds/%s", config.JobID) - - // Create a tmpfs-backed temp dir for layer extraction (source volume may be small). - // The extracted rootfs (~100-500MB) lives briefly in RAM, then mkfs.erofs compresses - // it down to a much smaller erofs file on the source volume. - exportDir := "/tmp/erofs-extract" - os.MkdirAll(exportDir, 0755) - mountCmd := exec.Command("mount", "-t", "tmpfs", "-o", "size=2G", "tmpfs", exportDir) - if out, err := mountCmd.CombinedOutput(); err != nil { - log.Printf("Warning: erofs creation failed (mount tmpfs): %v: %s", err, out) - return "" - } - defer func() { - exec.Command("umount", exportDir).Run() - os.Remove(exportDir) - }() - - // Create HTTP client (skip TLS verification for insecure registries) - client := &http.Client{Timeout: 120 * time.Second} - if config.RegistryInsecure { - client.Transport = &http.Transport{ - TLSClientConfig: nil, // default, no custom TLS - } - } - - // Get auth token - authHeader := "" - if config.RegistryToken != "" { - authHeader = "Bearer " + config.RegistryToken - } - - // Fetch manifest - manifestURL := fmt.Sprintf("%s/v2/%s/manifests/latest", baseURL, repo) - req, err := http.NewRequest("GET", manifestURL, nil) - if err != nil { - log.Printf("Warning: erofs creation failed (create manifest request): %v", err) - return "" - } - req.Header.Set("Accept", "application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.v2+json") - if authHeader != "" { - req.Header.Set("Authorization", authHeader) - } - - resp, err := client.Do(req) - if err != nil { - log.Printf("Warning: erofs creation failed (fetch manifest): %v", err) - return "" - } - defer resp.Body.Close() - if resp.StatusCode != 200 { - log.Printf("Warning: erofs creation failed (manifest status %d)", resp.StatusCode) - return "" - } - - // Parse the manifest - could be a direct manifest or an OCI index - var rawManifest json.RawMessage - if err := json.NewDecoder(resp.Body).Decode(&rawManifest); err != nil { - log.Printf("Warning: erofs creation failed (decode manifest): %v", err) - return "" - } - resp.Body.Close() - - // Check if it's a manifest list/index by looking for "manifests" key - var index struct { - MediaType string `json:"mediaType"` - Manifests []struct { - MediaType string `json:"mediaType"` - Digest string `json:"digest"` - Platform *struct { - Architecture string `json:"architecture"` - OS string `json:"os"` - } `json:"platform"` - } `json:"manifests"` - } - - type layerInfo struct { - MediaType string `json:"mediaType"` - Digest string `json:"digest"` - Size int64 `json:"size"` - } - var layers []layerInfo - - if err := json.Unmarshal(rawManifest, &index); err == nil && len(index.Manifests) > 0 { - // It's an OCI index - find the amd64/linux manifest - log.Printf("Manifest is an index with %d entries, resolving platform manifest...", len(index.Manifests)) - var platformDigest string - for _, m := range index.Manifests { - if m.Platform != nil && m.Platform.Architecture == "amd64" && m.Platform.OS == "linux" { - platformDigest = m.Digest - break - } - } - if platformDigest == "" && len(index.Manifests) > 0 { - // Fall back to first manifest - platformDigest = index.Manifests[0].Digest - } - if platformDigest == "" { - log.Printf("Warning: erofs creation failed: no suitable platform manifest found") - return "" - } - - // Fetch the platform-specific manifest - platURL := fmt.Sprintf("%s/v2/%s/manifests/%s", baseURL, repo, platformDigest) - platReq, err := http.NewRequest("GET", platURL, nil) - if err != nil { - log.Printf("Warning: erofs creation failed (create platform manifest request): %v", err) - return "" - } - platReq.Header.Set("Accept", "application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.v2+json") - if authHeader != "" { - platReq.Header.Set("Authorization", authHeader) - } - platResp, err := client.Do(platReq) - if err != nil { - log.Printf("Warning: erofs creation failed (fetch platform manifest): %v", err) - return "" - } - defer platResp.Body.Close() - if platResp.StatusCode != 200 { - log.Printf("Warning: erofs creation failed (platform manifest status %d)", platResp.StatusCode) - return "" - } - - var platManifest struct { - Layers []layerInfo `json:"layers"` - } - if err := json.NewDecoder(platResp.Body).Decode(&platManifest); err != nil { - log.Printf("Warning: erofs creation failed (decode platform manifest): %v", err) - return "" - } - layers = platManifest.Layers - } else { - // It's a direct manifest - var directManifest struct { - Layers []layerInfo `json:"layers"` - } - if err := json.Unmarshal(rawManifest, &directManifest); err != nil { - log.Printf("Warning: erofs creation failed (decode direct manifest): %v", err) - return "" - } - layers = directManifest.Layers - } - - log.Printf("Image has %d layers, extracting...", len(layers)) - - // Download and extract each layer - for i, layer := range layers { - blobURL := fmt.Sprintf("%s/v2/%s/blobs/%s", baseURL, repo, layer.Digest) - blobReq, err := http.NewRequest("GET", blobURL, nil) - if err != nil { - log.Printf("Warning: erofs creation failed (create blob request for layer %d): %v", i, err) - return "" - } - if authHeader != "" { - blobReq.Header.Set("Authorization", authHeader) - } - - blobResp, err := client.Do(blobReq) - if err != nil { - log.Printf("Warning: erofs creation failed (fetch layer %d): %v", i, err) - return "" - } - if blobResp.StatusCode != 200 { - blobResp.Body.Close() - log.Printf("Warning: erofs creation failed (layer %d status %d)", i, blobResp.StatusCode) - return "" - } - - // Determine decompression based on media type - tarFlags := "-xf" - if strings.Contains(layer.MediaType, "gzip") { - tarFlags = "-xzf" - } - // For zstd, use zstd pipe - if strings.Contains(layer.MediaType, "zstd") { - // Use zstd decompression via pipe - tarCmd := exec.Command("sh", "-c", fmt.Sprintf("zstd -d | tar -xf - -C %s", exportDir)) - tarCmd.Stdin = blobResp.Body - if out, err := tarCmd.CombinedOutput(); err != nil { - blobResp.Body.Close() - log.Printf("Warning: erofs creation failed (extract zstd layer %d): %v: %s", i, err, out) - return "" - } - } else { - tarCmd := exec.Command("tar", tarFlags, "-", "-C", exportDir) - tarCmd.Stdin = blobResp.Body - if out, err := tarCmd.CombinedOutput(); err != nil { - blobResp.Body.Close() - log.Printf("Warning: erofs creation failed (extract layer %d): %v: %s", i, err, out) - return "" - } - } - blobResp.Body.Close() - log.Printf(" Layer %d/%d extracted (%d bytes)", i+1, len(layers), layer.Size) - - // Process OCI whiteout files for THIS layer before extracting the next. - // Whiteouts must be applied per-layer: a whiteout in layer N deletes files - // from layers 0..N-1, but must not affect files added by layers N+1..last. - filepath.Walk(exportDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return nil - } - name := info.Name() - if strings.HasPrefix(name, ".wh.") { - if name == ".wh..wh..opq" { - // Opaque whiteout: remove all siblings - dir := filepath.Dir(path) - entries, _ := os.ReadDir(dir) - for _, e := range entries { - if e.Name() != ".wh..wh..opq" { - os.RemoveAll(filepath.Join(dir, e.Name())) - } - } - } else { - // Regular whiteout: remove the target file - target := filepath.Join(filepath.Dir(path), strings.TrimPrefix(name, ".wh.")) - os.RemoveAll(target) - } - os.Remove(path) - } - return nil - }) - } - - // Create erofs disk - erofsDst := config.SourcePath + "/.hypeman-rootfs.erofs" - log.Println("Running mkfs.erofs...") - erofsCmd := exec.Command("mkfs.erofs", "-zlz4", erofsDst, exportDir) - if erofsOut, erofsErr := erofsCmd.CombinedOutput(); erofsErr != nil { - log.Printf("Warning: erofs creation failed (mkfs.erofs): %v: %s", erofsErr, erofsOut) - return "" - } - - // Sync to ensure the erofs file is flushed to the block device before - // the host tries to mount and read the source volume. - syncCmd := exec.Command("sync") - syncCmd.Run() - - elapsed := time.Since(start) - log.Printf("erofs disk created at %s in %v", erofsDst, elapsed) - return ".hypeman-rootfs.erofs" -} - diff --git a/lib/builds/images/generic/Dockerfile b/lib/builds/images/generic/Dockerfile index 2dc047e4..5f485f33 100644 --- a/lib/builds/images/generic/Dockerfile +++ b/lib/builds/images/generic/Dockerfile @@ -33,9 +33,7 @@ FROM alpine:3.21 RUN apk add --no-cache \ ca-certificates \ git \ - curl \ - erofs-utils \ - zstd + curl # Copy BuildKit binaries from official image COPY --from=buildkit /usr/bin/buildctl /usr/bin/buildctl diff --git a/lib/builds/manager.go b/lib/builds/manager.go index 0396f832..960b70ca 100644 --- a/lib/builds/manager.go +++ b/lib/builds/manager.go @@ -266,7 +266,7 @@ func (m *manager) buildBuilderFromDockerfile(ctx context.Context) (string, error localTag := fmt.Sprintf("hypeman-builder-tmp:%d", time.Now().Unix()) m.logger.Info("building builder image with Docker", "tag", localTag) - buildCmd := exec.CommandContext(ctx, "docker", "build", "--network=host", "-t", localTag, "-f", dockerfilePath, ".") + buildCmd := exec.CommandContext(ctx, "docker", "build", "-t", localTag, "-f", dockerfilePath, ".") buildCmd.Env = dockerEnv if output, err := buildCmd.CombinedOutput(); err != nil { return "", fmt.Errorf("docker build: %s: %w", string(output), err) @@ -358,11 +358,7 @@ func (m *manager) waitForBuilderImageReady(ctx context.Context, imageRef string) case images.StatusReady: return nil case images.StatusFailed: - errDetail := "unknown" - if img.Error != nil { - errDetail = *img.Error - } - return fmt.Errorf("image conversion failed: %s", errDetail) + return fmt.Errorf("image conversion failed") } } time.Sleep(pollInterval) @@ -592,12 +588,11 @@ func (m *manager) runBuild(ctx context.Context, id string, req CreateBuildReques registryHost := stripRegistryScheme(m.config.RegistryURL) imageRef := fmt.Sprintf("%s/builds/%s", registryHost, id) - // If the pre-built erofs disk was successfully registered, skip the slow - // image conversion pipeline. Otherwise, fall back to waiting for the - // existing push → unpack → convert pipeline. - if result.ErofsDiskPath != "" { - m.logger.Info("using pre-built erofs disk, skipping image conversion wait", "id", id) - } else if err := m.waitForImageReady(buildCtx, id); err != nil { + // Wait for image to be ready before reporting build as complete. + // This fixes the race condition (KERNEL-863) where build reports "ready" + // but image conversion hasn't finished yet. + // Use buildCtx to respect the build timeout during image wait. + if err := m.waitForImageReady(buildCtx, id); err != nil { // Recalculate duration to include image wait time duration = time.Since(start) durationMS = duration.Milliseconds() @@ -655,14 +650,6 @@ func (m *manager) executeBuild(ctx context.Context, id string, req CreateBuildRe } defer m.volumeManager.DeleteVolume(context.Background(), sourceVolID) - // Ensure the source volume has enough free space for in-VM erofs creation. - // The volume is sized to fit the source content, but the builder agent needs - // extra space to extract image layers and create the erofs disk (~200-500MB). - if err := m.ensureMinVolumeSize(sourceVolID, 512*1024*1024); err != nil { - m.logger.Warn("failed to resize source volume for erofs optimization", "error", err) - // Non-fatal: build will still work, just without in-VM erofs optimization - } - // Create config volume with build.json for the builder agent configVolID := fmt.Sprintf("build-config-%s", id) configVolPath, err := m.createBuildConfigVolume(id, configVolID) @@ -739,112 +726,9 @@ func (m *manager) executeBuild(ctx context.Context, id string, req CreateBuildRe return nil, fmt.Errorf("wait for result: %w", err) } - // If the builder VM created a pre-built erofs disk, extract it from the - // source volume and register it directly as a ready image. This skips the - // slow host-side umoci unpack + mkfs.erofs conversion pipeline. - if result != nil && result.Success && result.ErofsDiskPath != "" { - if err := m.extractPrebuiltErofs(ctx, id, sourceVolID, result); err != nil { - m.logger.Warn("prebuilt erofs extraction failed, using fallback", "id", id, "error", err) - result.ErofsDiskPath = "" // Clear so runBuild uses fallback - } - } - return result, nil } -// ensureMinVolumeSize expands the source volume's ext4 image to at least minBytes -// so the builder agent has enough space for erofs creation. -func (m *manager) ensureMinVolumeSize(volID string, minBytes int64) error { - volPath := m.paths.VolumeData(volID) - - info, err := os.Stat(volPath) - if err != nil { - return fmt.Errorf("stat volume: %w", err) - } - - if info.Size() >= minBytes { - return nil // Already large enough - } - - // Expand the sparse file - if err := os.Truncate(volPath, minBytes); err != nil { - return fmt.Errorf("truncate volume: %w", err) - } - - // Resize the ext4 filesystem to fill the expanded file - resizeCmd := exec.Command("resize2fs", volPath) - if output, err := resizeCmd.CombinedOutput(); err != nil { - return fmt.Errorf("resize2fs: %w: %s", err, output) - } - - m.logger.Info("source volume resized for erofs optimization", "vol", volID, "size_mb", minBytes/1024/1024) - return nil -} - -// extractPrebuiltErofs mounts the source volume, copies the pre-built erofs disk -// to a temp location, and registers it as a ready image via the image manager. -func (m *manager) extractPrebuiltErofs(ctx context.Context, id, sourceVolID string, result *BuildResult) error { - volPath := m.paths.VolumeData(sourceVolID) - - // Create a temp mount point - mountPoint, err := os.MkdirTemp("", "hypeman-erofs-extract-*") - if err != nil { - return fmt.Errorf("create temp mount point: %w", err) - } - defer os.RemoveAll(mountPoint) - - // Mount the source volume read-only - mountCmd := exec.Command("mount", "-o", "loop,ro", volPath, mountPoint) - if output, err := mountCmd.CombinedOutput(); err != nil { - return fmt.Errorf("mount source volume: %w: %s", err, output) - } - defer func() { - umountCmd := exec.Command("umount", mountPoint) - umountCmd.Run() - }() - - // Sanitize the path from the VM to prevent path traversal (defense in depth) - if filepath.IsAbs(result.ErofsDiskPath) || strings.Contains(result.ErofsDiskPath, "..") { - return fmt.Errorf("invalid erofs disk path from VM: %s", result.ErofsDiskPath) - } - - // Check that the erofs file exists on the volume - erofsSrc := filepath.Join(mountPoint, result.ErofsDiskPath) - if _, err := os.Stat(erofsSrc); err != nil { - return fmt.Errorf("erofs disk not found on volume: %w", err) - } - - // Copy the erofs file to a temp location (so we can unmount the volume) - tmpErofs, err := os.CreateTemp("", "hypeman-prebuilt-*.erofs") - if err != nil { - return fmt.Errorf("create temp erofs file: %w", err) - } - tmpErofsPath := tmpErofs.Name() - tmpErofs.Close() - defer os.Remove(tmpErofsPath) // Clean up if RegisterPrebuiltImage moves it - - srcData, err := os.ReadFile(erofsSrc) - if err != nil { - return fmt.Errorf("read erofs from volume: %w", err) - } - if err := os.WriteFile(tmpErofsPath, srcData, 0644); err != nil { - return fmt.Errorf("write temp erofs: %w", err) - } - - // Register the pre-built image - registryHost := stripRegistryScheme(m.config.RegistryURL) - repo := fmt.Sprintf("%s/builds/%s", registryHost, id) - imageName := repo + ":latest" - - m.logger.Info("registering pre-built erofs image", "id", id, "digest", result.ImageDigest) - if err := m.imageManager.RegisterPrebuiltImage(ctx, repo, result.ImageDigest, imageName, tmpErofsPath); err != nil { - return fmt.Errorf("register prebuilt image: %w", err) - } - - m.logger.Info("pre-built erofs image registered successfully", "id", id) - return nil -} - // waitForResult waits for the build result from the builder agent via vsock func (m *manager) waitForResult(ctx context.Context, buildID string, inst *instances.Instance) (*BuildResult, error) { // Wait a bit for the VM to start and the builder agent to listen on vsock @@ -1059,11 +943,7 @@ func (m *manager) waitForImageReady(ctx context.Context, id string) error { m.logger.Debug("image is ready", "id", id, "image_ref", imageRef, "attempts", attempt+1) return nil case images.StatusFailed: - errDetail := "unknown" - if img.Error != nil { - errDetail = *img.Error - } - return fmt.Errorf("image conversion failed: %s", errDetail) + return fmt.Errorf("image conversion failed") case images.StatusPending, images.StatusPulling, images.StatusConverting: // Still processing, continue polling } diff --git a/lib/builds/manager_test.go b/lib/builds/manager_test.go index 070b6ef2..74e148e4 100644 --- a/lib/builds/manager_test.go +++ b/lib/builds/manager_test.go @@ -271,15 +271,6 @@ func (m *mockImageManager) ImportLocalImage(ctx context.Context, repo, reference return img, nil } -func (m *mockImageManager) RegisterPrebuiltImage(ctx context.Context, repo, digest, name string, diskSrcPath string) error { - m.images[name] = &images.Image{ - Name: name, - Digest: digest, - Status: images.StatusReady, - } - return nil -} - func (m *mockImageManager) GetImage(ctx context.Context, name string) (*images.Image, error) { if m.getImageErr != nil { return nil, m.getImageErr diff --git a/lib/builds/types.go b/lib/builds/types.go index 7ab120d9..247154b9 100644 --- a/lib/builds/types.go +++ b/lib/builds/types.go @@ -202,10 +202,6 @@ type BuildResult struct { // DurationMS is the build duration in milliseconds DurationMS int64 `json:"duration_ms"` - - // ErofsDiskPath is the relative path to a pre-built erofs disk on the source volume. - // When set, the host can skip the slow umoci unpack + mkfs.erofs conversion pipeline. - ErofsDiskPath string `json:"erofs_disk_path,omitempty"` } // DefaultBuildPolicy returns the default build policy diff --git a/lib/images/disk.go b/lib/images/disk.go index f07765ff..c7be831a 100644 --- a/lib/images/disk.go +++ b/lib/images/disk.go @@ -14,15 +14,12 @@ type ExportFormat string const ( FormatExt4 ExportFormat = "ext4" // Read-only ext4 (app images, default) - FormatErofs ExportFormat = "erofs" // Read-only compressed with LZ4 (faster creation, smaller images) + FormatErofs ExportFormat = "erofs" // Read-only compressed (future: when kernel supports it) FormatCpio ExportFormat = "cpio" // Uncompressed archive (initrd, fast boot) ) -// DefaultImageFormat is the default export format for OCI images. -// erofs is used because app rootfs disks are read-only in the VM (mounted -// as the lower layer of an overlayfs). erofs with LZ4 compression produces -// smaller disk images and is faster to create than ext4. -const DefaultImageFormat = FormatErofs +// DefaultImageFormat is the default export format for OCI images +const DefaultImageFormat = FormatExt4 // ExportRootfs exports rootfs directory in specified format (public for system manager) func ExportRootfs(rootfsDir, outputPath string, format ExportFormat) (int64, error) { @@ -187,15 +184,8 @@ func convertToExt4(rootfsDir, diskPath string) (int64, error) { return stat.Size(), nil } -// convertToErofs converts a rootfs directory to an erofs disk image using mkfs.erofs. -// If mkfs.erofs is not installed, it falls back to ext4 with a warning. +// convertToErofs converts a rootfs directory to an erofs disk image using mkfs.erofs func convertToErofs(rootfsDir, diskPath string) (int64, error) { - // Check if mkfs.erofs is available - if _, err := exec.LookPath("mkfs.erofs"); err != nil { - fmt.Fprintf(os.Stderr, "Warning: mkfs.erofs not found, falling back to ext4\n") - return convertToExt4(rootfsDir, diskPath) - } - // Ensure parent directory exists if err := os.MkdirAll(filepath.Dir(diskPath), 0755); err != nil { return 0, fmt.Errorf("create disk parent dir: %w", err) diff --git a/lib/images/disk_test.go b/lib/images/disk_test.go deleted file mode 100644 index 616277c9..00000000 --- a/lib/images/disk_test.go +++ /dev/null @@ -1,111 +0,0 @@ -package images - -import ( - "os" - "path/filepath" - "testing" - "time" -) - -// TestExportRootfsFormats exercises ext4 and erofs export with a realistic -// rootfs directory and compares creation time and output size. -func TestExportRootfsFormats(t *testing.T) { - // Build a small but representative rootfs tree - rootfsDir := t.TempDir() - populateTestRootfs(t, rootfsDir) - - formats := []ExportFormat{FormatExt4, FormatErofs} - results := make(map[ExportFormat]struct { - size int64 - duration time.Duration - }) - - for _, fmt := range formats { - t.Run(string(fmt), func(t *testing.T) { - outPath := filepath.Join(t.TempDir(), "rootfs."+string(fmt)) - - start := time.Now() - size, err := ExportRootfs(rootfsDir, outPath, fmt) - elapsed := time.Since(start) - - if err != nil { - t.Fatalf("ExportRootfs(%s) failed: %v", fmt, err) - } - if size == 0 { - t.Fatalf("ExportRootfs(%s) returned zero size", fmt) - } - - // Verify file exists and matches reported size - stat, err := os.Stat(outPath) - if err != nil { - t.Fatalf("stat output: %v", err) - } - if stat.Size() != size { - t.Errorf("reported size %d != actual size %d", size, stat.Size()) - } - - // Verify sector alignment (4096 bytes) - if size%4096 != 0 { - t.Errorf("output size %d is not sector-aligned (4096)", size) - } - - t.Logf("%s: size=%d bytes (%.1f KB), time=%v", fmt, size, float64(size)/1024, elapsed) - results[fmt] = struct { - size int64 - duration time.Duration - }{size, elapsed} - }) - } - - // Log comparison if both ran - if ext4, ok := results[FormatExt4]; ok { - if erofs, ok := results[FormatErofs]; ok { - ratio := float64(erofs.size) / float64(ext4.size) * 100 - t.Logf("erofs is %.0f%% the size of ext4 (ext4=%d, erofs=%d)", - ratio, ext4.size, erofs.size) - } - } -} - -// populateTestRootfs creates a small rootfs structure that resembles a real -// container image (directories, binaries, text files, symlinks). -func populateTestRootfs(t *testing.T, dir string) { - t.Helper() - - dirs := []string{ - "bin", "etc", "usr/bin", "usr/lib", "var/log", "tmp", - "boot-node/app/dist", "boot-node/app/node_modules/.pnpm", - } - for _, d := range dirs { - if err := os.MkdirAll(filepath.Join(dir, d), 0755); err != nil { - t.Fatal(err) - } - } - - // Simulate typical files: configs, scripts, compiled JS - files := map[string]int{ - "etc/passwd": 256, - "etc/hostname": 16, - "bin/sh": 128 * 1024, // 128KB "binary" - "usr/bin/node": 512 * 1024, // 512KB "binary" - "boot-node/app/package.json": 1024, - "boot-node/app/dist/index.js": 32 * 1024, - "boot-node/app/node_modules/.pnpm/a": 64 * 1024, - "boot-node/app/node_modules/.pnpm/b": 64 * 1024, - "boot-node/app/node_modules/.pnpm/c": 64 * 1024, - "var/log/boot.log": 0, - } - for name, size := range files { - data := make([]byte, size) - // Fill with non-zero data so compression has something to work with - for i := range data { - data[i] = byte(i % 251) - } - if err := os.WriteFile(filepath.Join(dir, name), data, 0644); err != nil { - t.Fatal(err) - } - } - - // Symlink - os.Symlink("sh", filepath.Join(dir, "bin/bash")) -} diff --git a/lib/images/manager.go b/lib/images/manager.go index d05f5dca..c423bf34 100644 --- a/lib/images/manager.go +++ b/lib/images/manager.go @@ -29,12 +29,6 @@ type Manager interface { // ImportLocalImage imports an image that was pushed to the local OCI cache. // Unlike CreateImage, it does not resolve from a remote registry. ImportLocalImage(ctx context.Context, repo, reference, digest string) (*Image, error) - // RegisterPrebuiltImage registers a pre-built erofs disk as a ready image. - // This skips the slow umoci unpack + mkfs.erofs conversion pipeline by using - // a disk that was already built inside the builder VM. - // The diskSrcPath is a temporary file that will be moved/copied to the image store. - // Metadata is extracted from the OCI cache (populated asynchronously by registry push). - RegisterPrebuiltImage(ctx context.Context, repo, digest, name string, diskSrcPath string) error GetImage(ctx context.Context, name string) (*Image, error) DeleteImage(ctx context.Context, name string) error RecoverInterruptedBuilds() @@ -192,110 +186,6 @@ func (m *manager) ImportLocalImage(ctx context.Context, repo, reference, digest return m.createAndQueueImage(ref) } -// RegisterPrebuiltImage registers a pre-built erofs disk as a ready image, -// skipping the slow umoci unpack + mkfs conversion pipeline. -func (m *manager) RegisterPrebuiltImage(ctx context.Context, repo, digestStr, imageName string, diskSrcPath string) error { - digestHex := strings.TrimPrefix(digestStr, "sha256:") - - m.createMu.Lock() - defer m.createMu.Unlock() - - // Check if this digest already exists and is ready (another path completed first) - if meta, err := readMetadata(m.paths, repo, digestHex); err == nil { - if meta.Status == StatusReady { - return nil // Already done - } - } - - // Create the digest directory - dir := digestDir(m.paths, repo, digestHex) - if err := os.MkdirAll(dir, 0755); err != nil { - return fmt.Errorf("create digest directory: %w", err) - } - - // Move/copy the pre-built erofs disk to the image store path - dstPath := digestPath(m.paths, repo, digestHex) - if err := os.Rename(diskSrcPath, dstPath); err != nil { - // Rename failed (cross-device), fall back to copy - srcData, readErr := os.ReadFile(diskSrcPath) - if readErr != nil { - return fmt.Errorf("read prebuilt disk: %w", readErr) - } - if writeErr := os.WriteFile(dstPath, srcData, 0644); writeErr != nil { - return fmt.Errorf("write prebuilt disk: %w", writeErr) - } - } - - // Align to sector boundary (required by hypervisors, same as convertToErofs in disk.go) - diskInfo, err := os.Stat(dstPath) - if err != nil { - return fmt.Errorf("stat prebuilt disk: %w", err) - } - if diskInfo.Size()%sectorSize != 0 { - alignedSize := alignToSector(diskInfo.Size()) - if err := os.Truncate(dstPath, alignedSize); err != nil { - return fmt.Errorf("align prebuilt disk to sector boundary: %w", err) - } - diskInfo, err = os.Stat(dstPath) - if err != nil { - return fmt.Errorf("stat aligned prebuilt disk: %w", err) - } - } - - // Extract metadata from OCI cache with retry. - // The registry push handler populates the OCI cache asynchronously, - // so it may not be available immediately after the build completes. - layoutTag := digestToLayoutTag(digestStr) - var containerMeta *containerMetadata - for attempt := 0; attempt < 20; attempt++ { - containerMeta, err = m.ociClient.extractOCIMetadata(layoutTag) - if err == nil { - break - } - select { - case <-ctx.Done(): - return fmt.Errorf("context cancelled waiting for OCI metadata: %w", ctx.Err()) - case <-time.After(500 * time.Millisecond): - } - } - if err != nil { - // Metadata not available - write with empty metadata rather than failing. - // The image is still bootable, just without entrypoint/cmd/env info. - containerMeta = &containerMetadata{ - Env: make(map[string]string), - } - fmt.Fprintf(os.Stderr, "Warning: could not extract OCI metadata for prebuilt image %s: %v\n", imageName, err) - } - - // Write metadata with status=ready - meta := &imageMetadata{ - Name: imageName, - Digest: digestStr, - Status: StatusReady, - SizeBytes: diskInfo.Size(), - Entrypoint: containerMeta.Entrypoint, - Cmd: containerMeta.Cmd, - Env: containerMeta.Env, - WorkingDir: containerMeta.WorkingDir, - CreatedAt: time.Now(), - } - - if err := writeMetadata(m.paths, repo, digestHex, meta); err != nil { - return fmt.Errorf("write metadata: %w", err) - } - - // Create tag symlink (e.g., "latest" -> digest hex) - // Parse the image name to extract the tag - normalized, parseErr := ParseNormalizedRef(imageName) - if parseErr == nil && normalized.Tag() != "" { - if symlinkErr := createTagSymlink(m.paths, repo, normalized.Tag(), digestHex); symlinkErr != nil { - fmt.Fprintf(os.Stderr, "Warning: failed to create tag symlink for prebuilt image: %v\n", symlinkErr) - } - } - - return nil -} - func (m *manager) createAndQueueImage(ref *ResolvedRef) (*Image, error) { meta := &imageMetadata{ Name: ref.String(), @@ -328,7 +218,6 @@ func (m *manager) buildImage(ctx context.Context, ref *ResolvedRef) { tempDir := filepath.Join(buildDir, "rootfs") if err := os.MkdirAll(buildDir, 0755); err != nil { - fmt.Fprintf(os.Stderr, "image build failed: create build dir: %v\n", err) m.updateStatusByDigest(ref, StatusFailed, fmt.Errorf("create build dir: %w", err)) m.recordBuildMetrics(ctx, buildStart, "failed") return @@ -344,7 +233,6 @@ func (m *manager) buildImage(ctx context.Context, ref *ResolvedRef) { // Pull the image (digest is always known, uses cache if already pulled) result, err := m.ociClient.pullAndExport(ctx, ref.String(), ref.Digest(), tempDir) if err != nil { - fmt.Fprintf(os.Stderr, "image build failed: pull and export %s: %v\n", ref.String(), err) m.updateStatusByDigest(ref, StatusFailed, fmt.Errorf("pull and export: %w", err)) m.recordPullMetrics(ctx, "failed") m.recordBuildMetrics(ctx, buildStart, "failed") @@ -366,9 +254,9 @@ func (m *manager) buildImage(ctx context.Context, ref *ResolvedRef) { m.updateStatusByDigest(ref, StatusConverting, nil) diskPath := digestPath(m.paths, ref.Repository(), ref.DigestHex()) + // Use default image format (ext4 for now, easy to switch to erofs later) diskSize, err := ExportRootfs(tempDir, diskPath, DefaultImageFormat) if err != nil { - fmt.Fprintf(os.Stderr, "image build failed: convert to %s for %s: %v\n", DefaultImageFormat, ref.String(), err) m.updateStatusByDigest(ref, StatusFailed, fmt.Errorf("convert to %s: %w", DefaultImageFormat, err)) return } diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 6bbcb4dd..3ea7cf8b 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -265,8 +265,9 @@ func ProvideBuildManager(p *paths.Paths, cfg *config.Config, instanceManager ins if buildConfig.MaxConcurrentBuilds == 0 { buildConfig.MaxConcurrentBuilds = 2 } - // If BuilderImage is empty, ensureBuilderImage will build from embedded Dockerfile - // (requires Docker socket). No default is set to allow this path. + if buildConfig.BuilderImage == "" { + buildConfig.BuilderImage = "hypeman/builder:latest" + } if buildConfig.RegistryURL == "" { buildConfig.RegistryURL = "localhost:8080" } diff --git a/lib/system/init/mount.go b/lib/system/init/mount.go index 833dc61b..07894d01 100644 --- a/lib/system/init/mount.go +++ b/lib/system/init/mount.go @@ -85,7 +85,7 @@ func waitForDevice(device string, timeout time.Duration) error { } // setupOverlay sets up the overlay filesystem: -// - /dev/vda: readonly rootfs (erofs or ext4, auto-detected) +// - /dev/vda: readonly rootfs (ext4) // - /dev/vdb: writable overlay disk (ext4) // - /overlay/newroot: merged overlay filesystem func setupOverlay(log *Logger) error { @@ -105,9 +105,8 @@ func setupOverlay(log *Logger) error { } } - // Mount readonly rootfs from /dev/vda. - // Filesystem type is auto-detected to support both ext4 (legacy) and erofs (default). - if err := mount("/dev/vda", "/lower", "", "ro"); err != nil { + // Mount readonly rootfs from /dev/vda (ext4 filesystem) + if err := mount("/dev/vda", "/lower", "ext4", "ro"); err != nil { return fmt.Errorf("mount rootfs: %w", err) } log.Info("overlay", "mounted rootfs from /dev/vda") @@ -180,13 +179,9 @@ func bindMountsToNewRoot(log *Logger) error { return nil } -// mount executes a mount command. -// If fstype is empty, the kernel auto-detects the filesystem type. +// mount executes a mount command func mount(source, target, fstype, options string) error { - var args []string - if fstype != "" { - args = append(args, "-t", fstype) - } + args := []string{"-t", fstype} if options != "" { args = append(args, "-o", options) } diff --git a/scripts/e2e-erofs-benchmark.sh b/scripts/e2e-erofs-benchmark.sh deleted file mode 100755 index 7a47d445..00000000 --- a/scripts/e2e-erofs-benchmark.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/bin/bash -# E2E Benchmark: Pre-built erofs optimization -# Submits a build and measures time from submission to "ready" status. -# Run with: ./scripts/e2e-erofs-benchmark.sh [label] -set -e - -LABEL="${1:-benchmark}" -API_URL="${API_URL:-http://localhost:8084}" -TOKEN="${TOKEN}" - -if [ -z "$TOKEN" ]; then - cd "$(dirname "$0")/.." - TOKEN=$(./bin/godotenv -f .env go run ./cmd/gen-jwt -user-id e2e-bench 2>/dev/null | tail -1) -fi - -echo "=== E2E Build Benchmark: $LABEL ===" -echo "API: $API_URL" - -# Check API is up -if ! curl -s "$API_URL/health" | grep -q "ok"; then - echo "ERROR: API not reachable at $API_URL" - exit 1 -fi -echo "API server is running" - -# Create test source with a unique layer to avoid digest deduplication -UNIQUE_ID="$(date +%s%N)-$$" -TEST_DIR=$(mktemp -d) -cat > "$TEST_DIR/Dockerfile" << DOCKERFILE -FROM node:20-alpine -WORKDIR /app -COPY package.json index.js ./ -RUN echo "build-id: $UNIQUE_ID" > /app/.build-id -CMD ["node", "index.js"] -DOCKERFILE - -cat > "$TEST_DIR/package.json" << 'EOF' -{"name":"bench-app","version":"1.0.0","main":"index.js"} -EOF - -cat > "$TEST_DIR/index.js" << 'EOF' -console.log("Benchmark app running at", new Date().toISOString()); -EOF - -TARBALL=$(mktemp --suffix=.tar.gz) -tar -czf "$TARBALL" -C "$TEST_DIR" . -rm -rf "$TEST_DIR" - -DOCKERFILE_CONTENT=$(tar -xzf "$TARBALL" -O ./Dockerfile 2>/dev/null) - -# Submit build -echo "" -echo "Submitting build..." -SUBMIT_TS=$(date +%s%N) - -RESPONSE=$(curl -s -X POST "$API_URL/builds" \ - -H "Authorization: Bearer $TOKEN" \ - -F "source=@$TARBALL" \ - -F "dockerfile=$DOCKERFILE_CONTENT" \ - -F "cache_scope=e2e-bench") - -BUILD_ID=$(echo "$RESPONSE" | jq -r '.id // empty') -if [ -z "$BUILD_ID" ]; then - echo "ERROR: Failed to submit build" - echo "$RESPONSE" | jq . - rm -f "$TARBALL" - exit 1 -fi -echo "Build ID: $BUILD_ID" - -# Poll for completion -echo "Polling for completion..." -LAST_STATUS="" -while true; do - RESPONSE=$(curl -s "$API_URL/builds/$BUILD_ID" -H "Authorization: Bearer $TOKEN") - STATUS=$(echo "$RESPONSE" | jq -r '.status') - - if [ "$STATUS" != "$LAST_STATUS" ]; then - TS=$(date +%s%N) - ELAPSED_MS=$(( (TS - SUBMIT_TS) / 1000000 )) - echo " [${ELAPSED_MS}ms] Status: $STATUS" - LAST_STATUS="$STATUS" - fi - - case "$STATUS" in - "ready") - READY_TS=$(date +%s%N) - break - ;; - "failed") - echo "ERROR: Build failed!" - echo "$RESPONSE" | jq . - echo "" - echo "=== Build Logs ===" - curl -s "$API_URL/builds/$BUILD_ID/events" -H "Authorization: Bearer $TOKEN" | jq -r '.[] | select(.type=="log") | .content' 2>/dev/null || \ - curl -s "$API_URL/builds/$BUILD_ID/logs" -H "Authorization: Bearer $TOKEN" - rm -f "$TARBALL" - exit 1 - ;; - "cancelled") - echo "Build cancelled!" - rm -f "$TARBALL" - exit 1 - ;; - esac - sleep 0.5 -done - -# Calculate timing -TOTAL_MS=$(( (READY_TS - SUBMIT_TS) / 1000000 )) -DURATION_MS=$(echo "$RESPONSE" | jq -r '.duration_ms // "unknown"') -IMAGE_DIGEST=$(echo "$RESPONSE" | jq -r '.image_digest // "none"') - -echo "" -echo "=== RESULTS: $LABEL ===" -echo "Build ID: $BUILD_ID" -echo "Image digest: $IMAGE_DIGEST" -echo "Agent duration: ${DURATION_MS}ms (build inside VM)" -echo "Total duration: ${TOTAL_MS}ms (submit to ready)" -echo "Post-build wait: $(( TOTAL_MS - ${DURATION_MS:-0} ))ms (image conversion)" -echo "" - -# Check server logs for erofs path indicators -echo "=== Server Log Indicators ===" -echo "(Looking for pre-built erofs indicators in the build response)" -HAS_EROFS=$(echo "$RESPONSE" | jq -r '.erofs_disk_path // empty') -if [ -n "$HAS_EROFS" ]; then - echo "Pre-built erofs: YES ($HAS_EROFS)" -else - echo "Pre-built erofs: NO (used fallback pipeline)" -fi - -rm -f "$TARBALL" -echo "" -echo "=== Benchmark Complete ==="