diff --git a/.env.example b/.env.example index 161c5261..ea445648 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,5 @@ # Required -JWT_SECRET='your-secret-key-here' +JWT_SECRET= # Data directory (default: /var/lib/hypeman) DATA_DIR=/var/lib/hypeman @@ -20,7 +20,8 @@ DATA_DIR=/var/lib/hypeman # Caddy / Ingress configuration # CADDY_LISTEN_ADDRESS=0.0.0.0 # CADDY_ADMIN_ADDRESS=127.0.0.1 -# CADDY_ADMIN_PORT=0 # 0 = random port (prevents conflicts on shared dev machines) +# CADDY_ADMIN_PORT=0 # 0 = random (for dev); install script sets to 2019 for production +# INTERNAL_DNS_PORT=0 # 0 = random (for dev); install script sets to 5353 for production # CADDY_STOP_ON_SHUTDOWN=false # Set to true if you want Caddy to stop when hypeman stops # ============================================================================= diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index a9f68db3..a3e5a556 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -88,6 +88,7 @@ type Config struct { CaddyListenAddress string // Address for Caddy to listen on CaddyAdminAddress string // Address for Caddy admin API CaddyAdminPort int // Port for Caddy admin API + InternalDNSPort int // Port for internal DNS server (used for dynamic upstreams) CaddyStopOnShutdown bool // Stop Caddy when hypeman shuts down // ACME / TLS configuration @@ -145,10 +146,12 @@ func Load() *Config { LogLevel: getEnv("LOG_LEVEL", "info"), // Caddy / Ingress configuration - CaddyListenAddress: getEnv("CADDY_LISTEN_ADDRESS", "0.0.0.0"), - CaddyAdminAddress: getEnv("CADDY_ADMIN_ADDRESS", "127.0.0.1"), - CaddyAdminPort: getEnvInt("CADDY_ADMIN_PORT", 0), // 0 = random port to prevent conflicts on shared dev machines - CaddyStopOnShutdown: getEnvBool("CADDY_STOP_ON_SHUTDOWN", false), + CaddyListenAddress: getEnv("CADDY_LISTEN_ADDRESS", "0.0.0.0"), + CaddyAdminAddress: getEnv("CADDY_ADMIN_ADDRESS", "127.0.0.1"), + CaddyAdminPort: getEnvInt("CADDY_ADMIN_PORT", 0), // 0 = random port to prevent conflicts on shared dev machines + InternalDNSPort: getEnvInt("INTERNAL_DNS_PORT", 0), // 0 = random port; used for dynamic upstream resolution + // Set to false if you're likely to frequently update hypeman + CaddyStopOnShutdown: getEnvBool("CADDY_STOP_ON_SHUTDOWN", true), // ACME / TLS configuration AcmeEmail: getEnv("ACME_EMAIL", ""), diff --git a/lib/ingress/manager.go b/lib/ingress/manager.go index 49b17674..5cdbf19e 100644 --- a/lib/ingress/manager.go +++ b/lib/ingress/manager.go @@ -174,21 +174,34 @@ func (m *manager) Initialize(ctx context.Context) error { log.WarnContext(ctx, "TLS ingresses exist but ACME is not configured - TLS will not work") } - // Check if any TLS ingresses have hostnames not in the allowed domains list + // Filter out TLS ingresses with hostnames not in the allowed domains list + // to prevent Caddy from trying to obtain certificates for invalid domains + var validIngresses []Ingress for _, ing := range ingresses { + var validRules []IngressRule for _, rule := range ing.Rules { if rule.TLS && !m.config.ACME.IsDomainAllowed(rule.Match.Hostname) { - log.WarnContext(ctx, "existing TLS ingress has hostname not in allowed domains list", + log.WarnContext(ctx, "skipping TLS ingress rule with hostname not in allowed domains list", "ingress", ing.Name, "hostname", rule.Match.Hostname, "allowed_domains", m.config.ACME.AllowedDomains, ) + continue // Skip this rule } + validRules = append(validRules, rule) + } + if len(validRules) > 0 { + ing.Rules = validRules + validIngresses = append(validIngresses, ing) + } else { + log.WarnContext(ctx, "skipping ingress with no valid rules", + "ingress", ing.Name, + ) } } - // Generate and write config - if err := m.regenerateConfig(ctx, ingresses); err != nil { + // Generate and write config with only valid ingresses + if err := m.regenerateConfig(ctx, validIngresses); err != nil { return fmt.Errorf("regenerate config: %w", err) } diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 70fc57dd..f6046ff7 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -137,11 +137,17 @@ func ProvideIngressManager(p *paths.Paths, cfg *config.Config, instanceManager i } } + // Use config value for internal DNS port, fall back to default (0 = random) if not set + internalDNSPort := cfg.InternalDNSPort + if internalDNSPort == 0 { + internalDNSPort = ingress.DefaultDNSPort + } + ingressConfig := ingress.Config{ ListenAddress: cfg.CaddyListenAddress, AdminAddress: cfg.CaddyAdminAddress, AdminPort: cfg.CaddyAdminPort, - DNSPort: ingress.DefaultDNSPort, + DNSPort: internalDNSPort, StopOnShutdown: cfg.CaddyStopOnShutdown, ACME: ingress.ACMEConfig{ Email: cfg.AcmeEmail, diff --git a/scripts/POC-README.md b/scripts/POC-README.md deleted file mode 100644 index 4e2162fd..00000000 --- a/scripts/POC-README.md +++ /dev/null @@ -1,125 +0,0 @@ -# Cloud Hypervisor POC - -Proof of concept for running 10 Chromium VMs simultaneously using Cloud Hypervisor with disk-based overlays, config disks, networking isolation, and standby/restore functionality. - -## Prerequisites - -Install cloud-hypervisor by [installing the pre-built binaries](https://www.cloudhypervisor.org/docs/prologue/quick-start/#use-pre-built-binaries). Make sure `ch-remote` and `cloud-hypervisor` are in path. - -```bash -ch-remote --version -cloud-hypervisor --version -``` - -Tested with version `v48.0.0` - -Note: Requires `kernel-images-private` cloned to home directory with `iproute2` installed in the Chromium headful image. - -Also, `lsof` and `lz4` needs to be installed on the host - -``` -sudo apt-get install -y lsof lz4 -``` - -## Setup - -Build kernel, initrd, and rootfs with config disk support: - -```bash -./scripts/build-initrd.sh -``` - -This creates: -- `data/system/vmlinux` - Linux kernel -- `data/system/initrd` - BusyBox init with disk-based overlay -- `data/images/chromium-headful/v1/rootfs.ext4` - Chromium rootfs (read-only, shared) - -Configure host network with bridge and guest isolation: - -```bash -./scripts/setup-host-network.sh -``` - -Create 10 VM configurations (IPs 192.168.100.10-19, isolated TAP devices, overlay disks, config disks): - -```bash -./scripts/setup-vms.sh -``` - -## Running VMs - -Start all 10 VMs: - -```bash -./scripts/start-all-vms.sh -``` - -Check VM status: - -```bash -./scripts/list-vms.sh -``` - -View VM logs: - -```bash -./scripts/logs-vm.sh # Show last 100 lines -./scripts/logs-vm.sh -f # Follow logs -``` - -SSH into a VM: - -```bash -./scripts/ssh-vm.sh # Password: root -``` - -Stop a VM: - -```bash -./scripts/stop-vm.sh -./scripts/stop-all-vms.sh # Stop all -``` - -## Standby / Restore - -Standby a VM (pause, snapshot, delete VMM): - -```bash -./scripts/standby-vm.sh -``` - -Restore a VM from snapshot: - -```bash -./scripts/restore-vm.sh -``` - -## Networking - -Enable port forwarding for WebRTC access (localhost:8080-8089 → guest VMs): - -```bash -./scripts/setup-port-forwarding.sh -``` - -Connect to a VM: - -```bash -./scripts/connect-guest.sh -``` - -## Volumes - -Create a persistent volume: - -```bash -./scripts/create-volume.sh -``` - -## Architecture - -- **Disk-based overlay**: Each VM has a 50GB sparse overlay disk on `/dev/vdb` (faster restore than tmpfs) -- **Config disk**: Each VM has a config disk on `/dev/vdc` with VM-specific settings (IP, MAC, envs) -- **Guest isolation**: VMs cannot communicate with each other (iptables + bridge_slave isolation) -- **Serial logging**: All VM output captured to `data/guests/guest-N/logs/console.log` -- **Shared rootfs**: Single read-only rootfs image shared across all VMs diff --git a/scripts/build-initrd.sh b/scripts/build-initrd.sh deleted file mode 100755 index 7f3f2b98..00000000 --- a/scripts/build-initrd.sh +++ /dev/null @@ -1,263 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -IMAGE='onkernel/chromium-headful-test:latest' -KERNEL_VERSION='ch-release-v6.12.8-20250613' -DIR=$(pwd) - -echo "========================================" -echo "Building Cloud Hypervisor POC Images" -echo "========================================" - -# ============================================ -# Download kernel if not present -# ============================================ -if [ ! -f "vmlinux" ]; then - echo "[INFO] Downloading kernel $KERNEL_VERSION..." - wget -q https://github.com/cloud-hypervisor/linux/releases/download/$KERNEL_VERSION/vmlinux-x86_64 -O vmlinux - echo "[INFO] Kernel downloaded successfully" -else - echo "[INFO] Kernel already exists, skipping download" -fi - -# ============================================ -# Build Docker image -# ============================================ -echo "[INFO] Building Docker image..." -cd ~/kernel-images-private/images/chromium-headful -./build-docker.sh -cd "$DIR" - -# ============================================ -# Extract rootfs from Docker image -# ============================================ -echo "[INFO] Extracting rootfs from Docker image..." -cid=$(docker create $IMAGE) -rm -rf rootfs || true -mkdir -p rootfs -docker export "$cid" | tar -C rootfs -xf - - -# Save metadata -echo "[INFO] Saving Docker metadata..." -docker inspect "$cid" > /tmp/docker-metadata.json -docker rm "$cid" - -# ============================================ -# Generate rootfs init script with config disk support -# ============================================ -echo "[INFO] Generating rootfs init script..." -cat > rootfs/init <<'EOF' -#!/bin/sh -set -x - -echo "init: start" > /dev/kmsg - -# All mounts are handled by overlay init - skip them entirely - -# Redirect stdout/stderr to serial console for logging -exec >/dev/ttyS0 2>&1 - -echo "init: mounting config disk" > /dev/kmsg - -# Mount config disk and source configuration -mkdir -p /mnt/config -mount -o ro /dev/vdc /mnt/config - -if [ -f /mnt/config/config.sh ]; then - echo "init: sourcing config from /mnt/config/config.sh" > /dev/kmsg - . /mnt/config/config.sh -else - echo "init: ERROR - config.sh not found on config disk!" > /dev/kmsg - /bin/sh -i - exit 1 -fi - -echo "init: configuring network" > /dev/kmsg - -# Configure network from config variables -ip link set lo up -ifconfig eth0 ${GUEST_IP} netmask ${GUEST_MASK} up -route add default gw ${GUEST_GW} -echo "nameserver ${GUEST_DNS}" > /etc/resolv.conf - -echo "init: network configured - IP: ${GUEST_IP}" > /dev/kmsg - -# Set up /dev symlinks for bash process substitution (Docker compatibility) -echo "init: setting up /dev symlinks" > /dev/kmsg -ln -sf /proc/self/fd /dev/fd 2>/dev/null || true -ln -sf /proc/self/fd/0 /dev/stdin 2>/dev/null || true -ln -sf /proc/self/fd/1 /dev/stdout 2>/dev/null || true -ln -sf /proc/self/fd/2 /dev/stderr 2>/dev/null || true - -# Set PATH for proper binary resolution -export PATH='/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' -export HOME='/root' - -# Set up signal handlers for graceful shutdown -trap 'echo "init: received SIGTERM, shutting down..." > /dev/kmsg; exit 0' TERM -trap 'echo "init: received SIGINT, shutting down..." > /dev/kmsg; exit 0' INT - -echo "init: starting SSH server" > /dev/kmsg - -# Create SSH directory -mkdir -p /var/run/sshd -mkdir -p /root/.ssh -chmod 700 /root/.ssh - -# Set root password (can be overridden in config) -# POC ONLY -echo "init: setting root password" > /dev/kmsg -echo "root:root" | chpasswd - -# Generate host keys if they don't exist -echo "init: generating SSH host keys" > /dev/kmsg -if [ ! -f /etc/ssh/ssh_host_rsa_key ]; then - ssh-keygen -A -fi - -# Configure SSH to allow root password login (POC only - not for production!) -echo "init: configuring SSH for root password login" > /dev/kmsg -cat >> /etc/ssh/sshd_config <<'SSHEOF' - -# POC Configuration - Allow root login with password -PermitRootLogin yes -PasswordAuthentication yes -SSHEOF - -# Start SSH daemon -echo "init: starting sshd daemon" > /dev/kmsg -/usr/sbin/sshd -if [ $? -eq 0 ]; then - echo "init: SSH server started successfully on port 22" > /dev/kmsg -else - echo "init: ERROR - SSH server failed to start!" > /dev/kmsg -fi -echo "init: launching entrypoint from ${WORKDIR:-/}" > /dev/kmsg -echo "init: entrypoint=${ENTRYPOINT} cmd=${CMD}" > /dev/kmsg - -# Change to workdir (default to / if empty) -cd ${WORKDIR:-/} - -# Execute entrypoint with cmd as arguments (like Docker does) -# Using exec replaces this shell with the entrypoint, making it PID 1 -# When it exits, the VM will stop (just like a Docker container) -exec ${ENTRYPOINT} ${CMD} -EOF -chmod +x rootfs/init - -# ============================================ -# Create rootfs disk image -# ============================================ -echo "[INFO] Creating rootfs disk image..." -DISK_SIZE="4G" -rm -f rootfs.ext4 || true -truncate -s $DISK_SIZE rootfs.ext4 -mkfs.ext4 -d rootfs rootfs.ext4 -F -q - -echo "[INFO] Created rootfs.ext4 disk image ($(du -h rootfs.ext4 | cut -f1))" - -# ============================================ -# Build minimal initramfs from busybox for overlay setup -# ============================================ -echo "[INFO] Building initramfs from busybox..." -busybox_cid=$(docker create busybox:latest) -rm -rf initramfs-overlay || true -mkdir -p initramfs-overlay -docker export "$busybox_cid" | tar -C initramfs-overlay -xf - -docker rm "$busybox_cid" - -# Create overlay init script with disk-based overlay and config disk support -cat > initramfs-overlay/init <<'EOF' -#!/bin/sh -set -xe -echo "overlay-init: start" > /dev/kmsg - -# Mount essentials -mount -t proc none /proc -mount -t sysfs none /sys -mount -t devtmpfs none /dev - -# Setup /dev properly BEFORE moving it -mkdir -p /dev/pts /dev/shm -mount -t devpts devpts /dev/pts -chmod 1777 /dev/shm - -echo "overlay-init: mounted proc/sys/dev with pts/shm" > /dev/kmsg - -# Mount readonly base filesystem from disk (/dev/vda) -mkdir -p /lower -mount -o ro /dev/vda /lower - -echo "overlay-init: mounted readonly rootfs from /dev/vda" > /dev/kmsg - -# Mount writable overlay disk from /dev/vdb (disk-based for faster restore) -mkdir -p /overlay -mount -t ext4 /dev/vdb /overlay - -echo "overlay-init: mounted writable overlay disk from /dev/vdb" > /dev/kmsg - -# Prepare overlay directories on the disk -mkdir -p /overlay/upper /overlay/work /overlay/newroot - -# Build overlay filesystem -mount -t overlay \ - -o lowerdir=/lower,upperdir=/overlay/upper,workdir=/overlay/work \ - overlay /overlay/newroot - -echo "overlay-init: created disk-based overlay" > /dev/kmsg - -# Move mounts to new root -cd /overlay/newroot -mkdir -p proc sys dev -mount --move /proc proc -mount --move /sys sys -mount --move /dev dev - -echo "overlay-init: switching root to overlay" > /dev/kmsg - -# Switch to overlay root and run the app init -# Note: /dev/vdc (config disk) will be mounted by the rootfs init -# Don't redirect here - let the new init handle console setup -exec switch_root . /init -EOF - -chmod +x initramfs-overlay/init - -# Package as initramfs -echo "[INFO] Packaging initramfs..." -rm -f initrd || true -cd initramfs-overlay -find . | cpio -H newc -o 2>/dev/null > ../initrd -cd .. - -echo "[INFO] Created initrd from busybox ($(du -h initrd | cut -f1))" - -# ============================================ -# Create data directory structure -# ============================================ -echo "[INFO] Creating data directory structure..." -mkdir -p data/system -mkdir -p data/images/chromium-headful/v1 - -# Copy artifacts to data directory -echo "[INFO] Copying artifacts to data/..." -cp vmlinux data/system/ -cp initrd data/system/ -cp rootfs.ext4 data/images/chromium-headful/v1/ -cp /tmp/docker-metadata.json data/images/chromium-headful/v1/metadata.json - -echo "" -echo "========================================" -echo "Build Complete!" -echo "========================================" -echo "Artifacts created in data/:" -echo " - data/system/vmlinux" -echo " - data/system/initrd" -echo " - data/images/chromium-headful/v1/rootfs.ext4" -echo " - data/images/chromium-headful/v1/metadata.json" -echo "" -echo "Next steps:" -echo " 1. ./scripts/setup-host-network.sh # Configure host networking" -echo " 2. ./scripts/setup-vms.sh # Create 10 VM configs" -echo " 3. ./scripts/start-all-vms.sh # Start all VMs" -echo "========================================" diff --git a/scripts/connect-guest.sh b/scripts/connect-guest.sh deleted file mode 100755 index 787737bc..00000000 --- a/scripts/connect-guest.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -ne 1 ]; then - echo "Usage: $0 " - echo "Example: $0 5" - echo "" - echo "VM ID should be 1-10" - exit 1 -fi - -VM_NUM="$1" - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" - -# Check if VM exists -if [ ! -d "$VM_DIR" ]; then - echo "[ERROR] VM not found: $VM_ID" - exit 1 -fi - -# Load config -if [ -f "$VM_DIR/config.json" ]; then - GUEST_IP=$(jq -r '.ip' "$VM_DIR/config.json") -else - echo "[ERROR] Config not found for $VM_ID" - exit 1 -fi - -# Check if VM is running -SOCKET="$VM_DIR/ch.sock" -if [ ! -S "$SOCKET" ] || ! sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[ERROR] VM $VM_ID is not running" - echo "Start it with: ./scripts/start-all-vms.sh" - exit 1 -fi - -# Calculate WebRTC port (assuming port forwarding is set up) -HOST_PORT=$((8080 + VM_NUM - 1)) # VM 1 -> 8080, VM 2 -> 8081, etc. -GUEST_PORT=8080 - -echo "========================================" -echo "Connect to VM: $VM_ID" -echo "========================================" -echo "" -echo "Guest IP: $GUEST_IP" -echo "" -echo "WebRTC Access (if port forwarding is enabled):" -echo " URL: http://localhost:$HOST_PORT" -echo " (Forwards to $GUEST_IP:$GUEST_PORT)" -echo "" -echo "Direct Network Access:" -echo " Guest IP: $GUEST_IP" -echo " WebRTC Port: $GUEST_PORT" -echo "" -echo "To enable port forwarding:" -echo " ./scripts/setup-port-forwarding.sh" -echo "" -echo "To test connectivity:" -echo " ping $GUEST_IP" -echo " curl http://$GUEST_IP:$GUEST_PORT" -echo "" -echo "To view logs:" -echo " ./scripts/logs-vm.sh $VM_NUM" -echo "========================================" - diff --git a/scripts/create-volume.sh b/scripts/create-volume.sh deleted file mode 100755 index b835834e..00000000 --- a/scripts/create-volume.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -ne 2 ]; then - echo "Usage: $0 " - echo "Example: $0 vol-test 10" - echo "" - echo "Creates a persistent volume of specified size" - exit 1 -fi - -VOL_ID="$1" -SIZE_GB="$2" - -# Validate size is a number -if ! [[ "$SIZE_GB" =~ ^[0-9]+$ ]]; then - echo "[ERROR] Size must be a positive integer (GB)" - exit 1 -fi - -VOL_DIR="$BASE_DIR/volumes/$VOL_ID" - -echo "========================================" -echo "Creating Volume: $VOL_ID" -echo "========================================" - -# Check if volume already exists -if [ -d "$VOL_DIR" ]; then - echo "[ERROR] Volume $VOL_ID already exists at $VOL_DIR" - exit 1 -fi - -# Create volume directory -echo "[INFO] Creating volume directory..." -mkdir -p "$VOL_DIR" - -DISK_PATH="$VOL_DIR/disk.raw" - -# Create sparse raw disk -echo "[INFO] Creating ${SIZE_GB}GB sparse disk..." -truncate -s "${SIZE_GB}G" "$DISK_PATH" - -# Format with ext4 -echo "[INFO] Formatting disk with ext4..." -mkfs.ext4 -q "$DISK_PATH" - -# Create metadata -TIMESTAMP=$(date -Iseconds) -cat > "$VOL_DIR/metadata.json" </dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + # Try passwordless sudo first, then prompt from terminal if needed + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + # Read password from /dev/tty (terminal) even when script is piped + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" +fi + +# Check for required commands +command -v curl >/dev/null 2>&1 || error "curl is required but not installed" +command -v tar >/dev/null 2>&1 || error "tar is required but not installed" +command -v systemctl >/dev/null 2>&1 || error "systemctl is required but not installed (systemd not available?)" +command -v setcap >/dev/null 2>&1 || error "setcap is required but not installed (install libcap2-bin)" +command -v openssl >/dev/null 2>&1 || error "openssl is required but not installed" + +# Additional checks for build-from-source mode +if [ -n "$BRANCH" ]; then + command -v git >/dev/null 2>&1 || error "git is required for BRANCH mode but not installed" + command -v go >/dev/null 2>&1 || error "go is required for BRANCH mode but not installed" + command -v make >/dev/null 2>&1 || error "make is required for BRANCH mode but not installed" +fi + +# Detect OS +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +if [ "$OS" != "linux" ]; then + error "Hypeman only supports Linux (detected: $OS)" +fi + +# Detect architecture +ARCH=$(uname -m) +case $ARCH in + x86_64|amd64) + ARCH="amd64" + ;; + aarch64|arm64) + ARCH="arm64" + ;; + *) + error "Unsupported architecture: $ARCH (supported: amd64, arm64)" + ;; +esac + +info "Pre-flight checks passed" + +# ============================================================================= +# Create temp directory +# ============================================================================= + +TMP_DIR=$(mktemp -d) +trap "rm -rf $TMP_DIR" EXIT + +# ============================================================================= +# Get binaries (either download release or build from source) +# ============================================================================= + +if [ -n "$BRANCH" ]; then + # Build from source mode + info "Building from source (branch: $BRANCH)..." + + BUILD_DIR="${TMP_DIR}/hypeman" + BUILD_LOG="${TMP_DIR}/build.log" + + # Clone repo (quiet) + if ! git clone --branch "$BRANCH" --depth 1 -q "https://github.com/${REPO}.git" "$BUILD_DIR" 2>&1 | tee -a "$BUILD_LOG"; then + error "Failed to clone repository. Build log:\n$(cat "$BUILD_LOG")" + fi + + info "Building binaries (this may take a few minutes)..." + cd "$BUILD_DIR" + + # Build main binary (includes dependencies) - capture output, show on error + if ! make build >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Build failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Build failed" + fi + cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" + + # Build hypeman-token (not included in make build) + if ! go build -o "${TMP_DIR}/hypeman-token" ./cmd/gen-jwt >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Build failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Failed to build hypeman-token" + fi + + # Copy .env.example for config template + cp ".env.example" "${TMP_DIR}/.env.example" + + VERSION="$BRANCH (source)" + cd - > /dev/null + + info "Build complete" +else + # Download release mode + if [ -z "$VERSION" ]; then + info "Fetching latest version..." + VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" | grep '"tag_name"' | cut -d'"' -f4) + if [ -z "$VERSION" ]; then + error "Failed to fetch latest version" + fi + fi + info "Installing version: $VERSION" + + # Construct download URL + VERSION_NUM="${VERSION#v}" + ARCHIVE_NAME="hypeman_${VERSION_NUM}_${OS}_${ARCH}.tar.gz" + DOWNLOAD_URL="https://github.com/${REPO}/releases/download/${VERSION}/${ARCHIVE_NAME}" + + info "Downloading ${ARCHIVE_NAME}..." + if ! curl -fsSL "$DOWNLOAD_URL" -o "${TMP_DIR}/${ARCHIVE_NAME}"; then + error "Failed to download from ${DOWNLOAD_URL}" + fi + + info "Extracting..." + tar -xzf "${TMP_DIR}/${ARCHIVE_NAME}" -C "$TMP_DIR" +fi + +# ============================================================================= +# Stop existing service if running +# ============================================================================= + +if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping existing ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" +fi + +# ============================================================================= +# Install binaries +# ============================================================================= + +info "Installing ${BINARY_NAME} to ${INSTALL_DIR}..." +$SUDO mkdir -p "$INSTALL_DIR" +$SUDO install -m 755 "${TMP_DIR}/${BINARY_NAME}" "${INSTALL_DIR}/${BINARY_NAME}" + +# Set capabilities for network operations +info "Setting capabilities..." +$SUDO setcap 'cap_net_admin,cap_net_bind_service=+eip' "${INSTALL_DIR}/${BINARY_NAME}" + +# Install hypeman-token binary +info "Installing hypeman-token to ${INSTALL_DIR}..." +$SUDO install -m 755 "${TMP_DIR}/hypeman-token" "${INSTALL_DIR}/hypeman-token" + +# Install wrapper script to /usr/local/bin for easy access +info "Installing hypeman-token wrapper to /usr/local/bin..." +$SUDO tee /usr/local/bin/hypeman-token > /dev/null << EOF +#!/bin/bash +# Wrapper script for hypeman-token that loads config from /etc/hypeman/config +set -a +source ${CONFIG_FILE} +set +a +exec ${INSTALL_DIR}/hypeman-token "\$@" +EOF +$SUDO chmod 755 /usr/local/bin/hypeman-token + +# ============================================================================= +# Create hypeman system user +# ============================================================================= + +if ! id "$SERVICE_USER" &>/dev/null; then + info "Creating system user: ${SERVICE_USER}..." + $SUDO useradd --system --no-create-home --shell /usr/sbin/nologin "$SERVICE_USER" +fi + +# Add hypeman user to kvm group for VM access +if getent group kvm &>/dev/null; then + $SUDO usermod -aG kvm "$SERVICE_USER" +fi + +# ============================================================================= +# Create directories +# ============================================================================= + +info "Creating data directory at ${DATA_DIR}..." +$SUDO mkdir -p "$DATA_DIR" +$SUDO chown "$SERVICE_USER:$SERVICE_USER" "$DATA_DIR" + +info "Creating config directory at ${CONFIG_DIR}..." +$SUDO mkdir -p "$CONFIG_DIR" + +# ============================================================================= +# Create config file (if it doesn't exist) +# ============================================================================= + +if [ ! -f "$CONFIG_FILE" ]; then + # Get config template (from local build or download from repo) + if [ -f "${TMP_DIR}/.env.example" ]; then + info "Using config template from source..." + cp "${TMP_DIR}/.env.example" "${TMP_DIR}/config" + else + info "Downloading config template..." + CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.example" + if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then + error "Failed to download config template from ${CONFIG_URL}" + fi + fi + + # Generate random JWT secret + info "Generating JWT secret..." + JWT_SECRET=$(openssl rand -hex 32) + sed -i "s/^JWT_SECRET=$/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" + + # Set fixed ports for production (instead of random ports used in dev) + # Replace entire line to avoid trailing comments being included in the value + sed -i "s/^# CADDY_ADMIN_PORT=.*/CADDY_ADMIN_PORT=2019/" "${TMP_DIR}/config" + sed -i "s/^# INTERNAL_DNS_PORT=.*/INTERNAL_DNS_PORT=5353/" "${TMP_DIR}/config" + + info "Installing config file at ${CONFIG_FILE}..." + $SUDO install -m 640 "${TMP_DIR}/config" "$CONFIG_FILE" + + # Set ownership: installing user owns the file, hypeman group can read it + # This allows CLI (running as user) and service (running as hypeman) to both read + INSTALL_USER="${SUDO_USER:-$(whoami)}" + $SUDO chown "${INSTALL_USER}:${SERVICE_USER}" "$CONFIG_FILE" +else + info "Config file already exists at ${CONFIG_FILE}, skipping..." +fi + +# ============================================================================= +# Install systemd service +# ============================================================================= + +info "Installing systemd service..." +$SUDO tee "${SYSTEMD_DIR}/${SERVICE_NAME}.service" > /dev/null << EOF +[Unit] +Description=Hypeman API Server +Documentation=https://github.com/onkernel/hypeman +After=network.target + +[Service] +Type=simple +User=${SERVICE_USER} +Group=${SERVICE_USER} +Environment="HOME=${DATA_DIR}" +EnvironmentFile=${CONFIG_FILE} +ExecStart=${INSTALL_DIR}/${BINARY_NAME} +Restart=on-failure +RestartSec=5 + +# Security hardening +ProtectSystem=strict +ProtectHome=true +PrivateTmp=true +ReadWritePaths=${DATA_DIR} +# Note: NoNewPrivileges=true is omitted because we need capabilities + +# Capabilities for network operations +AmbientCapabilities=CAP_NET_ADMIN CAP_NET_BIND_SERVICE +CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE + +[Install] +WantedBy=multi-user.target +EOF + +# Reload systemd +info "Reloading systemd..." +$SUDO systemctl daemon-reload + +# Enable service +info "Enabling ${SERVICE_NAME} service..." +$SUDO systemctl enable "$SERVICE_NAME" + +# Start service +info "Starting ${SERVICE_NAME} service..." +$SUDO systemctl start "$SERVICE_NAME" + +# ============================================================================= +# Install Hypeman CLI +# ============================================================================= + +CLI_REPO="onkernel/hypeman-cli" + +if [ -z "$CLI_VERSION" ]; then + info "Fetching latest CLI version..." + CLI_VERSION=$(curl -fsSL "https://api.github.com/repos/${CLI_REPO}/releases/latest" | grep '"tag_name"' | cut -d'"' -f4) + if [ -z "$CLI_VERSION" ]; then + warn "Failed to fetch latest CLI version, skipping CLI installation" + CLI_VERSION="" + fi +fi + +if [ -n "$CLI_VERSION" ]; then + info "Installing Hypeman CLI version: $CLI_VERSION" + + CLI_VERSION_NUM="${CLI_VERSION#v}" + CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${OS}_${ARCH}.tar.gz" + CLI_DOWNLOAD_URL="https://github.com/${CLI_REPO}/releases/download/${CLI_VERSION}/${CLI_ARCHIVE_NAME}" + + info "Downloading CLI ${CLI_ARCHIVE_NAME}..." + if curl -fsSL "$CLI_DOWNLOAD_URL" -o "${TMP_DIR}/${CLI_ARCHIVE_NAME}"; then + info "Extracting CLI..." + mkdir -p "${TMP_DIR}/cli" + tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" + + # Install CLI binary + info "Installing hypeman CLI to ${INSTALL_DIR}..." + $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman-cli" + + # Install wrapper script to /usr/local/bin for PATH access + info "Installing hypeman wrapper to /usr/local/bin..." + $SUDO tee /usr/local/bin/hypeman > /dev/null << WRAPPER +#!/bin/bash +# Wrapper script for hypeman CLI that auto-generates API token +set -a +source ${CONFIG_FILE} +set +a +export HYPEMAN_API_KEY=\$(${INSTALL_DIR}/hypeman-token -user-id "cli-user-\$(whoami)" 2>/dev/null) +exec ${INSTALL_DIR}/hypeman-cli "\$@" +WRAPPER + $SUDO chmod 755 /usr/local/bin/hypeman + else + warn "Failed to download CLI from ${CLI_DOWNLOAD_URL}, skipping CLI installation" + fi +fi + +# ============================================================================= +# Done +# ============================================================================= + +echo "" +echo -e "${PURPLE}" +cat << 'EOF' + ██╗ ██╗ ██╗ ██╗ ██████╗ ███████╗ ███╗ ███╗ █████╗ ███╗ ██╗ + ██║ ██║ ╚██╗ ██╔╝ ██╔══██╗ ██╔════╝ ████╗ ████║ ██╔══██╗ ████╗ ██║ + ███████║ ╚████╔╝ ██████╔╝ █████╗ ██╔████╔██║ ███████║ ██╔██╗ ██║ + ██╔══██║ ╚██╔╝ ██╔═══╝ ██╔══╝ ██║╚██╔╝██║ ██╔══██║ ██║╚██╗██║ + ██║ ██║ ██║ ██║ ███████╗ ██║ ╚═╝ ██║ ██║ ██║ ██║ ╚████║ + ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═══╝ +EOF +echo -e "${NC}" +info "Hypeman installed successfully!" +echo "" +echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" +echo " CLI: /usr/local/bin/hypeman" +echo " Token tool: /usr/local/bin/hypeman-token" +echo " Config: ${CONFIG_FILE}" +echo " Data: ${DATA_DIR}" +echo " Service: ${SERVICE_NAME}.service" +echo "" +echo "" +echo "Next steps:" +echo " - (Optional) Edit ${CONFIG_FILE} to configure your installation" +echo "" +echo "Get Started:" +echo "╭──────────────────────────────────────────╮" +echo "│ hypeman pull nginx:alpine │" +echo "│ hypeman run nginx:alpine │" +echo "│ hypeman logs │" +echo "│ hypeman exec -it /bin/sh │" +echo "│ hypeman --help │" +echo "╰──────────────────────────────────────────╯" +echo "" diff --git a/scripts/list-vms.sh b/scripts/list-vms.sh deleted file mode 100755 index 3651a363..00000000 --- a/scripts/list-vms.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -echo "========================================" -echo "VM Status" -echo "========================================" -printf "%-10s %-10s %-15s %-8s %-10s\n" "VM ID" "STATUS" "IP" "PID" "UPTIME" -echo "------------------------------------------------------------------------" - -if [ ! -d "$BASE_DIR/guests" ]; then - echo "No VMs found. Run ./scripts/setup-vms.sh first." - exit 0 -fi - -for i in {1..10}; do - VM_ID="guest-$i" - VM_DIR="$BASE_DIR/guests/$VM_ID" - - if [ ! -d "$VM_DIR" ]; then - continue - fi - - SOCKET="$VM_DIR/ch.sock" - - # Load config - if [ -f "$VM_DIR/config.json" ]; then - GUEST_IP=$(jq -r '.ip' "$VM_DIR/config.json" 2>/dev/null || echo "unknown") - else - GUEST_IP="unknown" - fi - - # Check if VM is running - if [ -S "$SOCKET" ]; then - if sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - STATUS="RUNNING" - - # Get PID - PID=$(sudo lsof -t "$SOCKET" 2>/dev/null || echo "?") - - # Get uptime - if [ "$PID" != "?" ]; then - UPTIME=$(ps -p "$PID" -o etime= 2>/dev/null | tr -d ' ' || echo "?") - else - UPTIME="?" - fi - else - STATUS="STOPPED" - PID="-" - UPTIME="-" - # Clean up stale socket - sudo rm -f "$SOCKET" 2>/dev/null || true - fi - else - STATUS="STOPPED" - PID="-" - UPTIME="-" - fi - - printf "%-10s %-10s %-15s %-8s %-10s\n" "$VM_ID" "$STATUS" "$GUEST_IP" "$PID" "$UPTIME" -done - -echo "------------------------------------------------------------------------" -echo "" -echo "Commands:" -echo " Start all: ./scripts/start-all-vms.sh" -echo " Stop VM: ./scripts/stop-vm.sh " -echo " SSH: ./scripts/ssh-vm.sh (password: root)" -echo " Logs: ./scripts/logs-vm.sh " -echo " Standby: ./scripts/standby-vm.sh " -echo "========================================" - diff --git a/scripts/logs-vm.sh b/scripts/logs-vm.sh deleted file mode 100755 index 06f97075..00000000 --- a/scripts/logs-vm.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -lt 1 ]; then - echo "Usage: $0 [follow]" - echo "Example: $0 5" - echo "Example: $0 5 -f # Follow logs" - echo "" - echo "VM ID should be 1-10" - exit 1 -fi - -VM_NUM="$1" -FOLLOW_FLAG="${2:-}" - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" -LOG_FILE="$VM_DIR/logs/console.log" - -# Check if log file exists -if [ ! -f "$LOG_FILE" ]; then - echo "[ERROR] Log file not found: $LOG_FILE" - echo "VM $VM_ID may not have been started yet." - exit 1 -fi - -echo "========================================" -echo "Logs for VM: $VM_ID" -echo "Log file: $LOG_FILE" -echo "========================================" -echo "" - -# Show logs -if [ "$FOLLOW_FLAG" = "-f" ] || [ "$FOLLOW_FLAG" = "--follow" ]; then - tail -f "$LOG_FILE" -else - tail -n 100 "$LOG_FILE" - echo "" - echo "Use '$0 $VM_NUM -f' to follow logs in real-time" -fi - diff --git a/scripts/restore-vm.sh b/scripts/restore-vm.sh deleted file mode 100755 index 0a3e23c9..00000000 --- a/scripts/restore-vm.sh +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -ne 1 ]; then - echo "Usage: $0 " - echo "Example: $0 5" - echo "" - echo "VM ID should be 1-10" - exit 1 -fi - -VM_NUM="$1" - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" -SOCKET="$VM_DIR/ch.sock" -SNAPSHOT_DIR="$VM_DIR/snapshots/snapshot-latest" - -echo "========================================" -echo "Restore VM: $VM_ID" -echo "========================================" - -# Start overall timing -RESTORE_START=$(date +%s%3N) - -# Check if VM directory exists -if [ ! -d "$VM_DIR" ]; then - echo "[ERROR] VM directory not found: $VM_DIR" - exit 1 -fi - -# Check if snapshot exists -if [ ! -d "$SNAPSHOT_DIR" ]; then - echo "[ERROR] Snapshot not found: $SNAPSHOT_DIR" - echo "Please create a snapshot first with: ./scripts/standby-vm.sh $VM_NUM" - exit 1 -fi - -# Check if VM is already running -if [ -S "$SOCKET" ]; then - if sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[ERROR] VM $VM_ID is already running" - echo "Stop it first or use a different VM ID" - exit 1 - else - # Stale socket - echo "[WARN] Removing stale socket..." - sudo rm -f "$SOCKET" - fi -fi - -# Recreate TAP device for restore -echo "[INFO] Setting up TAP device for restore..." -TAP_START=$(date +%s%3N) - -TAP=$(jq -r '.tap' "$VM_DIR/config.json" 2>/dev/null) -BRIDGE="vmbr0" - -if [ -z "$TAP" ]; then - echo "[ERROR] Could not read TAP device name from config.json" - exit 1 -fi - -# Remove TAP if it exists in a bad state -if ip link show "$TAP" &>/dev/null; then - echo "[INFO] Removing existing TAP device $TAP..." - sudo ip link set "$TAP" down 2>/dev/null || true - sudo ip link delete "$TAP" 2>/dev/null || true -fi - -# Create fresh TAP device -echo "[INFO] Creating TAP device $TAP..." -sudo ip tuntap add "$TAP" mode tap user "$(whoami)" -sudo ip link set "$TAP" up -sudo ip link set "$TAP" master "$BRIDGE" -sudo ip link set "$TAP" type bridge_slave isolated on -echo "[INFO] TAP device $TAP ready" - -TAP_END=$(date +%s%3N) -TAP_TIME=$((TAP_END - TAP_START)) - -LOG_FILE="$VM_DIR/logs/console.log" - -echo "[INFO] Starting cloud-hypervisor for restore..." -echo " Socket: $SOCKET" -echo " Snapshot: $SNAPSHOT_DIR" - -# Start cloud-hypervisor with just the API socket (no VM config yet) -# The restore will load everything from the snapshot -CH_START=$(date +%s%3N) - -sudo nohup cloud-hypervisor \ - --api-socket "$SOCKET" \ - > "$VM_DIR/ch-restore-stdout.log" 2>&1 & - -CH_PID=$! -echo "[INFO] Cloud-hypervisor started with PID: $CH_PID" - -# Wait for socket to be ready (poll every 0.01s, timeout after 1s) -echo "[INFO] Waiting for API socket to be ready..." -SOCKET_READY=false -for i in {1..100}; do - if [ -S "$SOCKET" ]; then - echo "[INFO] Socket ready" - SOCKET_READY=true - break - fi - sleep 0.01 -done - -if [ "$SOCKET_READY" = false ]; then - echo "[ERROR] Socket not created after 1 second" - sudo kill "$CH_PID" 2>/dev/null || true - exit 1 -fi - -CH_END=$(date +%s%3N) -CH_TIME=$((CH_END - CH_START)) - -# Decompress memory-ranges to tmpfs (memory) if compressed -MEMORY_FILE="$SNAPSHOT_DIR/memory-ranges" -COMPRESSED_FILE="$SNAPSHOT_DIR/memory-ranges.lz4" -TMPFS_SNAPSHOT="/dev/shm/ch-restore-$VM_ID" -DECOMPRESS_TIME=0 - -# Clean up any old tmpfs snapshot -rm -rf "$TMPFS_SNAPSHOT" 2>/dev/null || true - -if [ -f "$COMPRESSED_FILE" ]; then - echo "[INFO] Decompressing snapshot to tmpfs (memory)..." - DECOMPRESS_START=$(date +%s%3N) - - # Create tmpfs directory for this restore - mkdir -p "$TMPFS_SNAPSHOT" - - # Decompress directly to tmpfs (in memory, not disk!) - need sudo for root-owned file - sudo lz4 -d "$COMPRESSED_FILE" "$TMPFS_SNAPSHOT/memory-ranges" - - # Copy other snapshot files to tmpfs (small files, fast) - sudo cp "$SNAPSHOT_DIR/state.json" "$TMPFS_SNAPSHOT/" - sudo cp "$SNAPSHOT_DIR/config.json" "$TMPFS_SNAPSHOT/" - - # Fix permissions so cloud-hypervisor can read - sudo chmod 644 "$TMPFS_SNAPSHOT"/* - - DECOMPRESS_END=$(date +%s%3N) - DECOMPRESS_TIME=$((DECOMPRESS_END - DECOMPRESS_START)) - - COMPRESSED_SIZE=$(sudo stat -c%s "$COMPRESSED_FILE" 2>/dev/null || echo "0") - COMPRESSED_SIZE_MB=$((COMPRESSED_SIZE / 1048576)) - echo "[INFO] Decompressed ${COMPRESSED_SIZE_MB}MB to memory in $((DECOMPRESS_TIME))ms" - - # Use tmpfs snapshot location - SNAPSHOT_DIR_ACTUAL="$TMPFS_SNAPSHOT" -else - echo "[INFO] Using uncompressed snapshot from disk" - SNAPSHOT_DIR_ACTUAL="$SNAPSHOT_DIR" -fi - -# Restore from snapshot (now in tmpfs/memory if compressed) -echo "[INFO] Restoring from snapshot..." -SNAPSHOT_START=$(date +%s%3N) - -SNAPSHOT_URL="file://$SNAPSHOT_DIR_ACTUAL" -if ! sudo ch-remote --api-socket "$SOCKET" restore "source_url=$SNAPSHOT_URL"; then - echo "[ERROR] Failed to restore from snapshot" - echo "[INFO] Cleaning up..." - sudo kill "$CH_PID" 2>/dev/null || true - sudo rm -f "$SOCKET" - rm -rf "$TMPFS_SNAPSHOT" 2>/dev/null || true - exit 1 -fi - -SNAPSHOT_END=$(date +%s%3N) -SNAPSHOT_TIME=$((SNAPSHOT_END - SNAPSHOT_START)) - -echo "[INFO] Snapshot restored successfully" - -# Resume the VM -echo "[INFO] Resuming VM..." -RESUME_START=$(date +%s%3N) - -if ! sudo ch-remote --api-socket "$SOCKET" resume; then - echo "[ERROR] Failed to resume VM" - echo "[INFO] Cleaning up..." - sudo kill "$CH_PID" 2>/dev/null || true - sudo rm -f "$SOCKET" - exit 1 -fi - -RESUME_END=$(date +%s%3N) -RESUME_TIME=$((RESUME_END - RESUME_START)) - -echo "[INFO] VM resumed successfully" - -# Clean up tmpfs snapshot if used -if [ -d "$TMPFS_SNAPSHOT" ]; then - echo "[INFO] Cleaning up tmpfs snapshot..." - rm -rf "$TMPFS_SNAPSHOT" -fi - -# Restore memory to full 4GB (virtio-mem hot-plug) -echo "[INFO] Restoring memory to 4GB..." -MEMORY_RESTORE_START=$(date +%s%3N) -TARGET_MEMORY="4294967296" # 4GB in bytes - -if sudo ch-remote --api-socket "$SOCKET" resize --memory "$TARGET_MEMORY" 2>&1; then - MEMORY_RESTORE_END=$(date +%s%3N) - MEMORY_RESTORE_TIME=$((MEMORY_RESTORE_END - MEMORY_RESTORE_START)) - - # Verify memory was restored - FINAL_SIZE=$(sudo ch-remote --api-socket "$SOCKET" info 2>/dev/null | jq -r '.config.memory.size' || echo "0") - FINAL_SIZE_MB=$((FINAL_SIZE / 1048576)) - echo "[INFO] Memory restored to ${FINAL_SIZE_MB}MB ($((MEMORY_RESTORE_TIME))ms)" -else - echo "[WARN] Could not restore memory to 4GB, VM running at reduced size" - MEMORY_RESTORE_TIME=0 -fi - -# Log the operation -TIMESTAMP=$(date -Iseconds) -echo "$TIMESTAMP - VM $VM_ID restored from standby" >> "$VM_DIR/standby.log" - -# Verify it's running (no sleep needed) -if sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - RESTORE_END=$(date +%s%3N) - TOTAL_TIME=$((RESTORE_END - RESTORE_START)) - - # Convert milliseconds to seconds with 3 decimal places - format_time() { - local ms=$1 - local sec=$((ms / 1000)) - local msec=$((ms % 1000)) - printf "%d.%03d" $sec $msec - } - - echo "" - echo "========================================" - echo "Restore Complete!" - echo "========================================" - echo "VM: $VM_ID" - echo "PID: $CH_PID" - echo "Time: $TIMESTAMP" - echo "" - echo "Timing Breakdown:" - echo " TAP device setup: $(format_time $TAP_TIME)s" - echo " Cloud-hypervisor init: $(format_time $CH_TIME)s" - echo " LZ4 decompress→tmpfs: $(format_time $DECOMPRESS_TIME)s" - echo " Snapshot restore: $(format_time $SNAPSHOT_TIME)s" - echo " VM resume: $(format_time $RESUME_TIME)s" - echo " Memory resize (→4GB): $(format_time $MEMORY_RESTORE_TIME)s" - echo " ─────────────────────────────────" - echo " Total restore time: $(format_time $TOTAL_TIME)s" - echo "" - echo "Memory: ${FINAL_SIZE_MB}MB / 4096MB" - echo "" - echo "View logs: ./scripts/logs-vm.sh $VM_NUM" - echo "Check status: ./scripts/list-vms.sh" - echo "========================================" -else - echo "[ERROR] VM doesn't seem to be responding after restore" - exit 1 -fi - diff --git a/scripts/setup-host-network.sh b/scripts/setup-host-network.sh deleted file mode 100755 index af98e935..00000000 --- a/scripts/setup-host-network.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# --- config --- -BR=vmbr0 -SUBNET=192.168.100.0/24 -BR_IP=192.168.100.1/24 # host's gateway IP on the bridge -# Replace with your real uplink iface (route -n or ip r | grep default) -UPLINK=eth0 - -echo "[INFO] Starting host network setup..." -echo "[INFO] Configuration: bridge=$BR, subnet=$SUBNET, uplink=$UPLINK" - -# 1) bridge -echo "[INFO] Setting up bridge interface '$BR'..." -if ! ip link show "$BR" &>/dev/null; then - echo "[INFO] Creating bridge '$BR'..." - sudo ip link add "$BR" type bridge - echo "[INFO] Bridge '$BR' created successfully" -else - echo "[INFO] Bridge '$BR' already exists, skipping creation" -fi -echo "[INFO] Bringing up bridge '$BR'..." -sudo ip link set "$BR" up || true -echo "[INFO] Bridge '$BR' is up" - -# 2) assign host IP on bridge (idempotent) -echo "[INFO] Assigning IP address '$BR_IP' to bridge '$BR'..." -if ! ip -br addr show "$BR" | grep -q "${BR_IP%/*}"; then - sudo ip addr add "$BR_IP" dev "$BR" - echo "[INFO] IP address '$BR_IP' assigned to bridge '$BR'" -else - echo "[INFO] IP address '$BR_IP' already assigned to bridge '$BR', skipping" -fi - -# 3) IP forwarding -echo "[INFO] Enabling IP forwarding..." -sudo sysctl -w net.ipv4.ip_forward=1 >/dev/null -echo "[INFO] IP forwarding enabled" - -# 4) NAT (iptables) — add only if missing -echo "[INFO] Configuring iptables NAT and forwarding rules..." -if ! sudo iptables -t nat -C POSTROUTING -s "$SUBNET" -o "$UPLINK" -j MASQUERADE 2>/dev/null; then - echo "[INFO] Adding MASQUERADE rule for subnet $SUBNET on $UPLINK..." - sudo iptables -t nat -A POSTROUTING -s "$SUBNET" -o "$UPLINK" -j MASQUERADE - echo "[INFO] MASQUERADE rule added" -else - echo "[INFO] MASQUERADE rule already exists, skipping" -fi - -# 5) Guest isolation and security rules -echo "[INFO] Configuring guest isolation and forwarding rules..." - -# Allow established connections from uplink to bridge -if ! sudo iptables -C FORWARD -i "$UPLINK" -o "$BR" -m state --state RELATED,ESTABLISHED -j ACCEPT 2>/dev/null; then - echo "[INFO] Adding FORWARD rule for RELATED,ESTABLISHED traffic from $UPLINK to $BR..." - sudo iptables -A FORWARD -i "$UPLINK" -o "$BR" -m state --state RELATED,ESTABLISHED -j ACCEPT - echo "[INFO] FORWARD rule (RELATED,ESTABLISHED) added" -else - echo "[INFO] FORWARD rule (RELATED,ESTABLISHED) already exists, skipping" -fi - -# Allow new outbound connections from bridge to uplink -if ! sudo iptables -C FORWARD -i "$BR" -o "$UPLINK" -m state --state NEW,RELATED,ESTABLISHED -j ACCEPT 2>/dev/null; then - echo "[INFO] Adding FORWARD rule for outbound traffic from $BR to $UPLINK..." - sudo iptables -A FORWARD -i "$BR" -o "$UPLINK" -m state --state NEW,RELATED,ESTABLISHED -j ACCEPT - echo "[INFO] FORWARD rule added" -else - echo "[INFO] FORWARD rule already exists, skipping" -fi - -# Drop all other forwarded traffic (guest-to-guest isolation) -# This prevents VMs from talking to each other -# Note: Layer 2 isolation is handled per-TAP with bridge_slave isolated on -echo "[INFO] Checking FORWARD policy..." -CURRENT_POLICY=$(sudo iptables -L FORWARD | grep "^Chain FORWARD" | awk '{print $4}' | tr -d '()') -if [ "$CURRENT_POLICY" != "DROP" ]; then - echo "[INFO] Setting FORWARD policy to DROP for guest isolation..." - sudo iptables -P FORWARD DROP - echo "[INFO] FORWARD policy set to DROP" -else - echo "[INFO] FORWARD policy already set to DROP, skipping" -fi - -echo "" -echo "[SUCCESS] Host network ready!" -echo " Bridge: $BR (${BR_IP})" -echo " NAT via: $UPLINK" -echo " Guest isolation: ENABLED" -echo "" -echo "Next: Run scripts/setup-vms.sh to create VM TAP devices and configs" diff --git a/scripts/setup-port-forwarding.sh b/scripts/setup-port-forwarding.sh deleted file mode 100755 index 98d184e4..00000000 --- a/scripts/setup-port-forwarding.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -UPLINK="${UPLINK:-eth0}" # Can be overridden with env var -SUBNET_BASE="192.168.100" -HOST_PORT_START=8080 -GUEST_PORT=8080 - -echo "========================================" -echo "Setting up Port Forwarding for WebRTC" -echo "========================================" -echo "Uplink: $UPLINK" -echo "Forwarding: localhost:8080-8089 -> VMs:8080" -echo "" - -# Setup DNAT rules for each VM -for i in {1..10}; do - HOST_PORT=$((HOST_PORT_START + i - 1)) # 8080, 8081, ..., 8089 - GUEST_IP="${SUBNET_BASE}.$((9 + i))" # 192.168.100.10-19 - - # Check if rule already exists - if ! sudo iptables -t nat -C PREROUTING -p tcp --dport "$HOST_PORT" -j DNAT --to-destination "${GUEST_IP}:${GUEST_PORT}" 2>/dev/null; then - echo "[INFO] Adding port forward: localhost:$HOST_PORT -> $GUEST_IP:$GUEST_PORT" - sudo iptables -t nat -A PREROUTING -p tcp --dport "$HOST_PORT" -j DNAT --to-destination "${GUEST_IP}:${GUEST_PORT}" - else - echo "[INFO] Port forward already exists: localhost:$HOST_PORT -> $GUEST_IP:$GUEST_PORT" - fi - - # Also add rule for accessing from the host itself (localhost) - if ! sudo iptables -t nat -C OUTPUT -p tcp --dport "$HOST_PORT" -d 127.0.0.1 -j DNAT --to-destination "${GUEST_IP}:${GUEST_PORT}" 2>/dev/null; then - sudo iptables -t nat -A OUTPUT -p tcp --dport "$HOST_PORT" -d 127.0.0.1 -j DNAT --to-destination "${GUEST_IP}:${GUEST_PORT}" - fi -done - -echo "" -echo "========================================" -echo "Port Forwarding Configured!" -echo "========================================" -echo "" -echo "Access VMs via WebRTC:" -for i in {1..10}; do - HOST_PORT=$((HOST_PORT_START + i - 1)) - GUEST_IP="${SUBNET_BASE}.$((9 + i))" - echo " VM $i (guest-$i): http://localhost:$HOST_PORT -> $GUEST_IP:$GUEST_PORT" -done -echo "" -echo "Test with: curl http://localhost:8080" -echo "========================================" - diff --git a/scripts/setup-vms.sh b/scripts/setup-vms.sh deleted file mode 100755 index 10a1fbad..00000000 --- a/scripts/setup-vms.sh +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -NUM_VMS=10 -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" -BRIDGE="vmbr0" -SUBNET_BASE="192.168.100" -MAC_PREFIX="52:55:00:d1:55" - -echo "========================================" -echo "Setting up $NUM_VMS VMs" -echo "========================================" -echo "" -echo "WARNING: This will wipe all existing VM data!" -echo " - VM overlay disks will be recreated" -echo " - Config disks will be regenerated" -echo " - TAP devices will be recreated" -echo " - All logs and snapshots will be deleted" -echo "" - -# Check if base images exist -if [ ! -f "$BASE_DIR/system/vmlinux" ] || [ ! -f "$BASE_DIR/system/initrd" ]; then - echo "[ERROR] Base system images not found!" - echo "Please run ./build-initrd.sh first to create the base images." - exit 1 -fi - -if [ ! -f "$BASE_DIR/images/chromium-headful/v1/rootfs.ext4" ]; then - echo "[ERROR] Rootfs image not found!" - echo "Please run ./build-initrd.sh first to create the rootfs image." - exit 1 -fi - -# Load metadata for environment variables -METADATA_FILE="$BASE_DIR/images/chromium-headful/v1/metadata.json" - -# Create volumes directory -echo "[INFO] Creating volumes directory..." -mkdir -p "$BASE_DIR/volumes" - -# Create fresh hosts file -echo "[INFO] Creating fresh hosts file..." -cat > "$BASE_DIR/hosts" < "$CONFIG_DIR/config.sh" </dev/null | while read -r env_line; do - echo "export $env_line" >> "$CONFIG_DIR/config.sh" - done - - # Add runtime-specific overrides and additional envs - cat >> "$CONFIG_DIR/config.sh" </dev/null; then - echo " Removing existing TAP device..." - sudo ip link delete "$TAP" || true - fi - - # Create fresh TAP device - echo " Creating TAP device..." - sudo ip tuntap add "$TAP" mode tap user "$(whoami)" - - # Configure TAP device - echo " Configuring TAP device..." - sudo ip link set "$TAP" up - - # Attach to bridge - sudo ip link set "$TAP" master "$BRIDGE" - - # Enable guest isolation on layer 2 (prevents guests from talking to each other on bridge) - sudo ip link set "$TAP" type bridge_slave isolated on - - # Write metadata file - cat > "$VM_DIR/config.json" <> "$BASE_DIR/hosts" - - echo " VM $i setup complete!" -done - -echo "" -echo "========================================" -echo "VM Setup Complete!" -echo "========================================" -echo "Created fresh setup for $NUM_VMS VMs:" -echo " IPs: ${SUBNET_BASE}.10 - ${SUBNET_BASE}.19" -echo " MACs: ${MAC_PREFIX}:01 - ${MAC_PREFIX}:0a" -echo " TAPs: tap-guest-1 - tap-guest-10" -echo "" -echo "VM data stored in: $BASE_DIR/guests/" -echo "Hosts file created: $BASE_DIR/hosts" -echo "" -echo "All VMs have fresh overlay disks and configs from Docker metadata." -echo "" -echo "Next step: Run ./scripts/start-all-vms.sh to boot all VMs" -echo "========================================" - diff --git a/scripts/ssh-vm.sh b/scripts/ssh-vm.sh deleted file mode 100755 index f0419d0a..00000000 --- a/scripts/ssh-vm.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" -SUBNET_BASE="192.168.100" - -# Check arguments -if [ $# -lt 1 ]; then - echo "Usage: $0 [ssh-args]" - echo "Example: $0 5" - echo "Example: $0 5 -L 8080:localhost:8080" - echo "" - echo "VM ID should be 1-10" - echo "Default password: root" - exit 1 -fi - -VM_NUM="$1" -shift # Remove first argument, keep the rest for SSH - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" - -# Check if VM exists -if [ ! -d "$VM_DIR" ]; then - echo "[ERROR] VM not found: $VM_ID" - echo "Run ./scripts/setup-vms.sh first" - exit 1 -fi - -# Load config to get IP -if [ -f "$VM_DIR/config.json" ]; then - GUEST_IP=$(jq -r '.ip' "$VM_DIR/config.json") -else - # Fallback calculation - GUEST_IP="${SUBNET_BASE}.$((9 + VM_NUM))" -fi - -# Check if VM is running -SOCKET="$VM_DIR/ch.sock" -if [ ! -S "$SOCKET" ] || ! sudo ch-remote --api-socket "$SOCKET" info &>/dev/null 2>&1; then - echo "[ERROR] VM $VM_ID is not running" - echo "Start it with: ./scripts/start-all-vms.sh" - exit 1 -fi - -echo "Connecting to VM $VM_ID ($GUEST_IP)..." -echo "Password: root" -echo "" - -# SSH to the VM with any additional arguments passed -# -o StrictHostKeyChecking=no to avoid host key prompts on first connect -# -o UserKnownHostsFile=/dev/null to avoid saving host keys -ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@"$GUEST_IP" "$@" - diff --git a/scripts/standby-vm.sh b/scripts/standby-vm.sh deleted file mode 100755 index b8cd4ee9..00000000 --- a/scripts/standby-vm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -lt 1 ]; then - echo "Usage: $0 [--compress]" - echo "Example: $0 5" - echo "Example: $0 5 --compress" - echo "" - echo "VM ID should be 1-10" - echo "Options:" - echo " --compress Compress snapshot with LZ4 (slower snapshot, faster restore on slow disks)" - exit 1 -fi - -VM_NUM="$1" -COMPRESS_FLAG="${2:-}" - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" -SOCKET="$VM_DIR/ch.sock" -SNAPSHOT_DIR="$VM_DIR/snapshots/snapshot-latest" - -echo "========================================" -echo "Standby VM: $VM_ID" -echo "========================================" - -# Start overall timing -STANDBY_START=$(date +%s%3N) - -# Check if VM directory exists -if [ ! -d "$VM_DIR" ]; then - echo "[ERROR] VM directory not found: $VM_DIR" - echo "Please run ./scripts/setup-vms.sh first." - exit 1 -fi - -# Check if VM is running -if [ ! -S "$SOCKET" ]; then - echo "[ERROR] VM $VM_ID is not running (socket not found)" - exit 1 -fi - -# Check if we can communicate with the VM -if ! sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[ERROR] Cannot communicate with VM $VM_ID" - echo "Socket exists but VM is not responding" - exit 1 -fi - -# Create snapshot directory -echo "[INFO] Preparing snapshot directory..." -rm -rf "$SNAPSHOT_DIR" || true -mkdir -p "$SNAPSHOT_DIR" - -# Try to reduce memory before snapshot (virtio-mem hot-unplug) -echo "[INFO] Attempting to reduce memory to 1GB for snapshot..." -MEMORY_RESIZE_START=$(date +%s%3N) -TARGET_SIZE="1073741824" # 1GB in bytes - -# Resize memory down to 1GB (virtio-mem will unplug what it can safely) -RESIZE_OUTPUT=$(sudo ch-remote --api-socket "$SOCKET" resize --memory "$TARGET_SIZE" 2>&1) || true - -MEMORY_RESIZE_END=$(date +%s%3N) -MEMORY_RESIZE_TIME=$((MEMORY_RESIZE_END - MEMORY_RESIZE_START)) - -# Check what was actually achieved -ACTUAL_SIZE=$(sudo ch-remote --api-socket "$SOCKET" info 2>/dev/null | jq -r '.config.memory.size' || echo "unknown") -ACTUAL_SIZE_MB=$((ACTUAL_SIZE / 1048576)) - -echo "[INFO] Memory resize completed in $((MEMORY_RESIZE_TIME))ms" -echo "[INFO] Memory size: ${ACTUAL_SIZE_MB}MB (target was 1024MB)" - -if [ "$ACTUAL_SIZE_MB" -gt 1024 ]; then - echo "[WARN] Could not reduce to target - guest using more than 1GB" - echo "[WARN] Snapshot will be larger than optimal" -fi - -# TODO: why do we need this sleep? -# For some reason, if I immediately pause the VM then snapshot, -# the memory isn't yet reduced to 1GB in terms of snapshot size. -sleep 2 - -# Pause the VM -echo "[INFO] Pausing VM..." -PAUSE_START=$(date +%s%3N) -if ! sudo ch-remote --api-socket "$SOCKET" pause; then - echo "[ERROR] Failed to pause VM" - exit 1 -fi -PAUSE_END=$(date +%s%3N) -PAUSE_TIME=$((PAUSE_END - PAUSE_START)) -echo "[INFO] VM paused successfully ($((PAUSE_TIME))ms)" - -# Create snapshot -echo "[INFO] Creating snapshot..." -SNAPSHOT_START=$(date +%s%3N) -SNAPSHOT_URL="file://$SNAPSHOT_DIR" -if ! sudo ch-remote --api-socket "$SOCKET" snapshot "$SNAPSHOT_URL"; then - echo "[ERROR] Failed to create snapshot" - echo "[INFO] Attempting to resume VM..." - sudo ch-remote --api-socket "$SOCKET" resume || true - exit 1 -fi -SNAPSHOT_END=$(date +%s%3N) -SNAPSHOT_TIME=$((SNAPSHOT_END - SNAPSHOT_START)) -echo "[INFO] Snapshot created successfully ($((SNAPSHOT_TIME))ms)" - -# Optionally compress memory-ranges (only if --compress flag provided) -COMPRESS_TIME=0 -ORIGINAL_SIZE_MB=0 -COMPRESSED_SIZE_MB=0 -RATIO="1.0" - -if [ "$COMPRESS_FLAG" = "--compress" ]; then - echo "[INFO] Compressing snapshot with LZ4 (fast mode)..." - COMPRESS_START=$(date +%s%3N) - MEMORY_FILE="$SNAPSHOT_DIR/memory-ranges" - - if [ -f "$MEMORY_FILE" ]; then - # Get original size - ORIGINAL_SIZE=$(sudo stat -c%s "$MEMORY_FILE") - ORIGINAL_SIZE_MB=$((ORIGINAL_SIZE / 1048576)) - - # Compress with LZ4 fast mode (-1), remove original (need sudo - file owned by root) - sudo lz4 -1 --rm "$MEMORY_FILE" "$MEMORY_FILE.lz4" - - COMPRESS_END=$(date +%s%3N) - COMPRESS_TIME=$((COMPRESS_END - COMPRESS_START)) - - # Get compressed size and ratio - COMPRESSED_SIZE=$(sudo stat -c%s "$MEMORY_FILE.lz4") - COMPRESSED_SIZE_MB=$((COMPRESSED_SIZE / 1048576)) - RATIO=$(awk "BEGIN {printf \"%.1f\", $ORIGINAL_SIZE / $COMPRESSED_SIZE}") - - echo "[INFO] Compressed ${ORIGINAL_SIZE_MB}MB → ${COMPRESSED_SIZE_MB}MB (${RATIO}x, $((COMPRESS_TIME))ms)" - else - echo "[WARN] memory-ranges file not found, skipping compression" - fi -else - echo "[INFO] Skipping compression (use --compress flag to enable)" -fi - -# Get cloud-hypervisor PID by finding which process has the overlay disk open -echo "[INFO] Stopping cloud-hypervisor process..." -OVERLAY_DISK="$VM_DIR/overlay.raw" -CH_PID=$(sudo lsof -t "$OVERLAY_DISK" 2>/dev/null | head -1) - -if [ -z "$CH_PID" ]; then - echo "[ERROR] Could not find cloud-hypervisor PID!" - echo "[ERROR] No process has $OVERLAY_DISK open" - echo "[ERROR] The VM might already be stopped or check manually:" - echo " ps aux | grep cloud-hypervisor | grep $VM_ID" - exit 1 -fi - -echo "[INFO] Found cloud-hypervisor process (PID: $CH_PID)" -sudo kill "$CH_PID" - -# Wait for process to die -for i in {1..10}; do - if ! sudo kill -0 "$CH_PID" 2>/dev/null; then - echo "[INFO] Process terminated" - break - fi - sleep 0.5 -done - -# Force kill if still alive -if sudo kill -0 "$CH_PID" 2>/dev/null; then - echo "[WARN] Process still alive, force killing..." - sudo kill -9 "$CH_PID" - sleep 1 -fi - -# Wait for file locks to be released -echo "[INFO] Waiting for disk locks to be released..." -sleep 2 - -echo "[INFO] Cloud-hypervisor process stopped (PID: $CH_PID)" - -# Remove socket -sudo rm -f "$SOCKET" || true - -# Get TAP device name from config and remove it -echo "[INFO] Cleaning up TAP device..." -TAP=$(jq -r '.tap' "$VM_DIR/config.json" 2>/dev/null || echo "") - -if [ -n "$TAP" ] && ip link show "$TAP" &>/dev/null; then - echo "[INFO] Removing TAP device $TAP..." - sudo ip link set "$TAP" down 2>/dev/null || true - sudo ip link delete "$TAP" 2>/dev/null || true - echo "[INFO] TAP device $TAP removed" -else - echo "[INFO] TAP device not found or already removed" -fi - -# Log the operation -echo "[INFO] Logging standby operation..." -TIMESTAMP=$(date -Iseconds) -STANDBY_END=$(date +%s%3N) -TOTAL_TIME=$((STANDBY_END - STANDBY_START)) - -# Get snapshot size -SNAPSHOT_SIZE=$(du -sh "$SNAPSHOT_DIR" 2>/dev/null | cut -f1 || echo "unknown") - -echo "$TIMESTAMP - VM $VM_ID entered standby - Memory: ${ACTUAL_SIZE_MB}MB, Snapshot: $SNAPSHOT_SIZE" >> "$VM_DIR/standby.log" - -# Format time helper -format_time() { - local ms=$1 - local sec=$((ms / 1000)) - local msec=$((ms % 1000)) - printf "%d.%03d" $sec $msec -} - -echo "" -echo "========================================" -echo "Standby Complete!" -echo "========================================" -echo "VM: $VM_ID" -echo "Snapshot: $SNAPSHOT_DIR" -echo "Snapshot size: $SNAPSHOT_SIZE" -echo "Time: $TIMESTAMP" -echo "" -echo "Timing Breakdown:" -echo " Memory resize: $(format_time $MEMORY_RESIZE_TIME)s (→ ${ACTUAL_SIZE_MB}MB)" -echo " VM pause: $(format_time $PAUSE_TIME)s" -echo " Snapshot save: $(format_time $SNAPSHOT_TIME)s" -if [ "$COMPRESS_FLAG" = "--compress" ] && [ "$COMPRESS_TIME" -gt 0 ]; then - echo " LZ4 compress: $(format_time $COMPRESS_TIME)s (${ORIGINAL_SIZE_MB}MB → ${COMPRESSED_SIZE_MB}MB, ${RATIO}x)" -fi -echo " Process stop: (included above)" -echo " ─────────────────────────────────" -echo " Total time: $(format_time $TOTAL_TIME)s" -echo "" -echo "To restore: ./scripts/restore-vm.sh $VM_NUM" -echo "========================================" - diff --git a/scripts/start-all-vms.sh b/scripts/start-all-vms.sh deleted file mode 100755 index f91f27b3..00000000 --- a/scripts/start-all-vms.sh +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -NUM_VMS=10 -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" -SUBNET_BASE="192.168.100" - -echo "========================================" -echo "Starting $NUM_VMS VMs" -echo "========================================" - -# Verify data directory exists -if [ ! -d "$BASE_DIR/guests" ]; then - echo "[ERROR] VM data directory not found!" - echo "Please run ./scripts/setup-vms.sh first." - exit 1 -fi - -# Verify images exist -if [ ! -f "$BASE_DIR/system/vmlinux" ]; then - echo "[ERROR] Kernel not found at $BASE_DIR/system/vmlinux" - exit 1 -fi - -if [ ! -f "$BASE_DIR/system/initrd" ]; then - echo "[ERROR] Initrd not found at $BASE_DIR/system/initrd" - exit 1 -fi - -if [ ! -f "$BASE_DIR/images/chromium-headful/v1/rootfs.ext4" ]; then - echo "[ERROR] Rootfs not found at $BASE_DIR/images/chromium-headful/v1/rootfs.ext4" - exit 1 -fi - -ROOTFS="$BASE_DIR/images/chromium-headful/v1/rootfs.ext4" -KERNEL="$BASE_DIR/system/vmlinux" -INITRD="$BASE_DIR/system/initrd" - -# Start each VM -for i in $(seq 1 $NUM_VMS); do - VM_ID="guest-$i" - VM_DIR="$BASE_DIR/guests/$VM_ID" - - # Load config - if [ ! -f "$VM_DIR/config.json" ]; then - echo "[ERROR] Config not found for $VM_ID" - continue - fi - - GUEST_IP=$(jq -r '.ip' "$VM_DIR/config.json") - MAC=$(jq -r '.mac' "$VM_DIR/config.json") - TAP=$(jq -r '.tap' "$VM_DIR/config.json") - SOCKET="$VM_DIR/ch.sock" - LOG_FILE="$VM_DIR/logs/console.log" - OVERLAY_DISK="$VM_DIR/overlay.raw" - CONFIG_DISK="$VM_DIR/config.ext4" - - # Check if VM is already running - if [ -S "$SOCKET" ]; then - if sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[SKIP] VM $i ($VM_ID) is already running" - continue - else - # Socket exists but VM not responding, clean it up - echo "[WARN] Stale socket found for $VM_ID, removing..." - sudo rm -f "$SOCKET" - fi - fi - - echo "" - echo "[INFO] Starting VM $i ($VM_ID)..." - echo " IP: $GUEST_IP" - echo " TAP: $TAP" - echo " Socket: $SOCKET" - echo " Log: $LOG_FILE" - - # Verify TAP device exists - if ! ip link show "$TAP" &>/dev/null; then - echo "[ERROR] TAP device $TAP not found! Run ./scripts/setup-vms.sh first." - continue - fi - - # Start cloud-hypervisor in background - # Note: Using nohup and & to run in background, redirecting output to avoid terminal clutter - # Using virtio-mem: 1GB base + 3GB hotplug = 4GB max, can resize down for snapshots - sudo nohup cloud-hypervisor \ - --kernel "$KERNEL" \ - --initramfs "$INITRD" \ - --cmdline 'console=ttyS0' \ - --cpus boot=2 \ - --memory size=1G,hotplug_method=virtio-mem,hotplug_size=3G \ - --disk path="$ROOTFS",readonly=on path="$OVERLAY_DISK" path="$CONFIG_DISK",readonly=on \ - --net "tap=$TAP,ip=$GUEST_IP,mask=255.255.255.0,mac=$MAC" \ - --serial "file=$LOG_FILE" \ - --console off \ - --api-socket "$SOCKET" \ - > "$VM_DIR/ch-stdout.log" 2>&1 & - - CH_PID=$! - echo " Started with PID: $CH_PID" - - # Give it a moment to start - sleep 0.5 - - # Verify it's running - if ! kill -0 $CH_PID 2>/dev/null; then - echo "[ERROR] VM $i failed to start! Check $VM_DIR/ch-stdout.log" - else - echo " VM $i started successfully!" - - # Expand memory to full 4GB using virtio-mem - echo " Expanding memory to 4GB..." - if sudo ch-remote --api-socket "$SOCKET" resize --memory 4294967296 2>/dev/null; then - echo " Memory expanded to 4GB" - else - echo " [WARN] Could not expand memory, running at 1GB" - fi - fi -done - -echo "" -echo "========================================" -echo "VM Startup Complete!" -echo "========================================" -echo "" -echo "Use ./scripts/list-vms.sh to see running VMs" -echo "Use ./scripts/ssh-vm.sh to SSH into a VM (password: root)" -echo "Use ./scripts/logs-vm.sh to view VM logs" -echo "Use ./scripts/connect-guest.sh to access a VM" -echo "" -echo "To test standby/restore:" -echo " ./scripts/standby-vm.sh 5" -echo " ./scripts/restore-vm.sh 5" -echo "========================================" - diff --git a/scripts/stop-all-vms.sh b/scripts/stop-all-vms.sh deleted file mode 100755 index 63a45607..00000000 --- a/scripts/stop-all-vms.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -echo "========================================" -echo "Stopping All VMs" -echo "========================================" - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Stop each VM -for i in {1..10}; do - echo "" - "$SCRIPT_DIR/stop-vm.sh" "$i" -done - -echo "" -echo "========================================" -echo "All VMs stopped" -echo "========================================" - diff --git a/scripts/stop-vm.sh b/scripts/stop-vm.sh deleted file mode 100755 index febda08b..00000000 --- a/scripts/stop-vm.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Configuration -BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/data" - -# Check arguments -if [ $# -ne 1 ]; then - echo "Usage: $0 " - echo "Example: $0 5" - echo "" - echo "VM ID should be 1-10" - exit 1 -fi - -VM_NUM="$1" - -# Validate VM number -if ! [[ "$VM_NUM" =~ ^[0-9]+$ ]] || [ "$VM_NUM" -lt 1 ] || [ "$VM_NUM" -gt 10 ]; then - echo "[ERROR] Invalid VM ID: $VM_NUM" - echo "VM ID must be between 1 and 10" - exit 1 -fi - -VM_ID="guest-$VM_NUM" -VM_DIR="$BASE_DIR/guests/$VM_ID" -SOCKET="$VM_DIR/ch.sock" - -echo "[INFO] Stopping VM: $VM_ID" - -# Check if VM is running -if [ ! -S "$SOCKET" ]; then - echo "[INFO] VM $VM_ID is not running" - exit 0 -fi - -# Check if we can communicate with the VM -if ! sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[WARN] VM $VM_ID socket exists but not responding" - echo "[INFO] Cleaning up stale socket..." - sudo rm -f "$SOCKET" - exit 0 -fi - -# Try graceful shutdown via API -echo "[INFO] Sending shutdown command..." -if sudo ch-remote --api-socket "$SOCKET" shutdown 2>/dev/null; then - echo "[INFO] Shutdown command sent, waiting for VM to stop..." - - # Wait up to 30 seconds for graceful shutdown - for i in {1..60}; do - if ! sudo ch-remote --api-socket "$SOCKET" info &>/dev/null; then - echo "[INFO] VM stopped gracefully" - sudo rm -f "$SOCKET" 2>/dev/null || true - exit 0 - fi - sleep 0.5 - done - - echo "[WARN] VM did not stop gracefully, force killing..." -fi - -# Force kill -PID=$(sudo lsof -t "$SOCKET" 2>/dev/null || echo "") -if [ -n "$PID" ]; then - echo "[INFO] Force stopping cloud-hypervisor (PID: $PID)..." - sudo kill -9 "$PID" || true - sleep 1 -fi - -# Clean up socket -sudo rm -f "$SOCKET" 2>/dev/null || true - -echo "[INFO] VM $VM_ID stopped" - diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh new file mode 100755 index 00000000..acc22143 --- /dev/null +++ b/scripts/uninstall.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# +# Hypeman Uninstall Script +# +# Usage: +# curl -fsSL https://raw.githubusercontent.com/onkernel/hypeman/main/scripts/uninstall.sh | bash +# +# Options (via environment variables): +# KEEP_DATA=false - Remove data directory (/var/lib/hypeman) - kept by default +# KEEP_CONFIG=true - Keep config directory (/etc/hypeman) +# + +set -e + +INSTALL_DIR="/opt/hypeman" +DATA_DIR="/var/lib/hypeman" +CONFIG_DIR="/etc/hypeman" +SYSTEMD_DIR="/etc/systemd/system" +SERVICE_NAME="hypeman" +SERVICE_USER="hypeman" + +# Colors for output (true color) +RED='\033[38;2;255;110;110m' +GREEN='\033[38;2;92;190;83m' +YELLOW='\033[0;33m' +PURPLE='\033[38;2;172;134;249m' +NC='\033[0m' # No Color + +info() { echo -e "${GREEN}[INFO]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } + +# ============================================================================= +# Pre-flight checks +# ============================================================================= + +info "Running pre-flight checks..." + +# Check for root or sudo access +SUDO="" +if [ "$EUID" -ne 0 ]; then + if ! command -v sudo >/dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + # Try passwordless sudo first, then prompt from terminal if needed + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" +fi + +# ============================================================================= +# Stop and disable service +# ============================================================================= + +if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" +fi + +if $SUDO systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Disabling ${SERVICE_NAME} service..." + $SUDO systemctl disable "$SERVICE_NAME" +fi + +# ============================================================================= +# Remove systemd service +# ============================================================================= + +if [ -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" ]; then + info "Removing systemd service..." + $SUDO rm -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" + $SUDO systemctl daemon-reload +fi + +# ============================================================================= +# Remove binaries and wrappers +# ============================================================================= + +info "Removing binaries..." + +# Remove wrapper scripts from /usr/local/bin +$SUDO rm -f /usr/local/bin/hypeman +$SUDO rm -f /usr/local/bin/hypeman-token + +# Remove install directory +if [ -d "$INSTALL_DIR" ]; then + $SUDO rm -rf "$INSTALL_DIR" +fi + +# ============================================================================= +# Handle data directory +# ============================================================================= + +if [ -d "$DATA_DIR" ]; then + if [ "${KEEP_DATA:-true}" = "true" ]; then + info "Keeping data directory: ${DATA_DIR}" + else + info "Removing data directory: ${DATA_DIR}" + $SUDO rm -rf "$DATA_DIR" + fi +fi + +# ============================================================================= +# Handle config directory +# ============================================================================= + +if [ -d "$CONFIG_DIR" ]; then + if [ "${KEEP_CONFIG:-false}" = "true" ]; then + warn "Keeping config directory: ${CONFIG_DIR}" + else + info "Removing config directory: ${CONFIG_DIR}" + $SUDO rm -rf "$CONFIG_DIR" + fi +fi + +# ============================================================================= +# Remove hypeman user +# ============================================================================= + +if id "$SERVICE_USER" &>/dev/null; then + if [ "${KEEP_DATA:-true}" = "true" ]; then + info "Keeping system user: ${SERVICE_USER} (data is preserved)" + else + info "Removing system user: ${SERVICE_USER}" + $SUDO userdel "$SERVICE_USER" 2>/dev/null || true + fi +fi + +# ============================================================================= +# Done +# ============================================================================= + +echo "" +echo -e "${PURPLE}" +cat << 'EOF' + ██╗ ██╗ ██╗ ██╗ ██████╗ ███████╗ ███╗ ███╗ █████╗ ███╗ ██╗ + ██║ ██║ ╚██╗ ██╔╝ ██╔══██╗ ██╔════╝ ████╗ ████║ ██╔══██╗ ████╗ ██║ + ███████║ ╚████╔╝ ██████╔╝ █████╗ ██╔████╔██║ ███████║ ██╔██╗ ██║ + ██╔══██║ ╚██╔╝ ██╔═══╝ ██╔══╝ ██║╚██╔╝██║ ██╔══██║ ██║╚██╗██║ + ██║ ██║ ██║ ██║ ███████╗ ██║ ╚═╝ ██║ ██║ ██║ ██║ ╚████║ + ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═══╝ +EOF +echo -e "${NC}" +info "Hypeman uninstalled successfully!" +echo "" + +if [ "${KEEP_DATA:-true}" = "true" ] && [ -d "$DATA_DIR" ]; then + info "Data directory preserved: ${DATA_DIR}" + echo " To remove: sudo rm -rf ${DATA_DIR}" + echo "" +fi + +if [ "${KEEP_CONFIG:-false}" = "true" ] && [ -d "$CONFIG_DIR" ]; then + info "Config directory preserved: ${CONFIG_DIR}" + echo " To remove: sudo rm -rf ${CONFIG_DIR}" + echo "" +fi + +warn "Note: Caddy or Cloud Hypervisor processes may still be running." +echo " Check with: ps aux | grep -E 'caddy|cloud-h'" +echo " Kill all: sudo pkill -f caddy; sudo pkill -f cloud-h" +echo "" + +echo "To reinstall:" +echo " curl -fsSL https://raw.githubusercontent.com/onkernel/hypeman/main/scripts/install.sh | bash" +echo ""