From e815c5c3bb9e6a7f4163b7a1d24a43e3c2fa13aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 10:39:58 +0000 Subject: [PATCH 01/25] CA-420968: compute the amount of physical cores available on a NUMA node set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Could also compute it by multiplying it with [threads_per_core], but I'm not sure how that'd interact with [smt=false] in Xen. Also to future-proof this I wouldn't want to rely on an entirely symmetrical architecture (although it'd be very rare to have anything other than 2 on x86-64, or to have hyperthreading on in one socket, and off in another). Note that core ids are not unique (there is a core `0` on both socket 0 and socket 1 for example), so only work with number of cores in the topology code. Could've created a CoreSocketSet instead (assuming that no higher grouping than sockets would exist in the future), but for now don't make too many assumptions about topology. No functional change. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/topology.ml | 21 ++++++++++++++------- ocaml/xenopsd/lib/topology.mli | 28 ++++++++++++++++++++++------ ocaml/xenopsd/test/test_topology.ml | 24 ++++++++++++++++++------ ocaml/xenopsd/xc/domain.ml | 14 ++++++++++++-- 4 files changed, 66 insertions(+), 21 deletions(-) diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index a2cd401a0cc..7234153505c 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -28,19 +28,20 @@ module CPUSet = struct end module NUMAResource = struct - type t = {affinity: CPUSet.t; memfree: int64} + type t = {affinity: CPUSet.t; cores: int; memfree: int64} - let make ~affinity ~memfree = + let make ~affinity ~cores ~memfree = if memfree < 0L then invalid_arg (Printf.sprintf "NUMAResource: memory cannot be negative: %Ld" memfree) ; - {affinity; memfree} + {affinity; cores; memfree} - let empty = {affinity= CPUSet.empty; memfree= 0L} + let empty = {affinity= CPUSet.empty; cores= 0; memfree= 0L} let union a b = make ~affinity:(CPUSet.union a.affinity b.affinity) + ~cores:(a.cores + b.cores) ~memfree:(Int64.add a.memfree b.memfree) let min_memory r1 r2 = {r1 with memfree= min r1.memfree r2.memfree} @@ -50,6 +51,7 @@ module NUMAResource = struct Dump.record [ Dump.field "affinity" (fun t -> t.affinity) CPUSet.pp_dump + ; Dump.field "cores" (fun t -> t.cores) int ; Dump.field "memfree" (fun t -> t.memfree) int64 ] ) @@ -134,6 +136,7 @@ module NUMA = struct distances: int array array ; cpu_to_node: node array ; node_cpus: CPUSet.t array + ; node_cores: int array ; all: CPUSet.t ; node_usage: int array (** Usage across nodes is meant to be balanced when choosing candidates for a VM *) @@ -203,7 +206,7 @@ module NUMA = struct |> seq_sort ~cmp:dist_cmp |> Seq.map (fun ((_, avg), nodes) -> (avg, Seq.map (fun n -> Node n) nodes)) - let make ~distances ~cpu_to_node = + let make ~distances ~cpu_to_node ~node_cores = let ( let* ) = Option.bind in let node_cpus = Array.map (fun _ -> CPUSet.empty) distances in @@ -256,6 +259,7 @@ module NUMA = struct distances ; cpu_to_node= Array.map node_of_int cpu_to_node ; node_cpus + ; node_cores ; all ; node_usage= Array.map (fun _ -> 0) distances ; candidates @@ -265,6 +269,8 @@ module NUMA = struct let cpuset_of_node t (Node i) = t.node_cpus.(i) + let coreset_of_node t (Node i) = t.node_cores.(i) + let node_of_cpu t i = t.cpu_to_node.(i) let nodes t = @@ -278,8 +284,8 @@ module NUMA = struct {t with node_cpus; all} let resource t node ~memory = - let affinity = cpuset_of_node t node in - NUMAResource.make ~affinity ~memfree:memory + let affinity = cpuset_of_node t node and cores = coreset_of_node t node in + NUMAResource.make ~affinity ~cores ~memfree:memory let candidates t = t.candidates @@ -316,6 +322,7 @@ module NUMA = struct ; Dump.field "node_cpus" (fun t -> t.node_cpus) (Dump.array CPUSet.pp_dump) + ; Dump.field "node_cores" (fun t -> t.node_cores) (Dump.array int) ] ) end diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index f1bd6f9f569..d9263b58325 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -29,10 +29,21 @@ end module NUMAResource : sig (** A NUMA node providing CPU and memory resources *) - type t = private {affinity: CPUSet.t; memfree: int64} - - val make : affinity:CPUSet.t -> memfree:int64 -> t - (** [make ~affinity ~memfree] constructs a resource requiring affinity to be + type t = private { + affinity: CPUSet.t + (** logical CPUs. This is the smallest unit of scheduling available, + e.g. a hyperthread. + This can be used directly as a soft-, or hard-affinity mask. *) + ; cores: int + (** number of physical cores fully contained in this node, each containing threads_per_core CPUs, + although some of them may be disabled if [smt=false] *) + ; memfree: int64 + (** free (not reserved, not in use) memory available on this NUMA + node or set of NUMA nodes *) + } + + val make : affinity:CPUSet.t -> cores:int -> memfree:int64 -> t + (** [make ~affinity ~cores ~memfree] constructs a resource requiring affinity to be non-empty and memfree to be > 0. A zero request is allowed due to [shrink]. * *) @@ -78,8 +89,12 @@ module NUMA : sig (** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *) type node = private Node of int - val make : distances:int array array -> cpu_to_node:int array -> t option - (** [make distances cpu_to_node] stores the topology. [distances] is a square + val make : + distances:int array array + -> cpu_to_node:int array + -> node_cores:int array + -> t option + (** [make distances cpu_to_node node_cores] stores the topology. [distances] is a square matrix [d] where [d.(i).(j)] is an approximation to how much slower it is to access memory from node [j] when running on node [i]. Distances are normalized to 10, [d.(i).(i)] must equal to 10, and all values must be >= @@ -94,6 +109,7 @@ module NUMA : sig in Xen and then to -1 by the bindings). [cpu_to_nodes.(i)] = NUMA node of CPU [i] + [node_cores.(i)] = number of cores on NUMA node [i] NUMA nodes without any CPUs are accepted (to handle hard affinities). diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index d9945ed8018..629d42343b4 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -58,25 +58,35 @@ module Distances = struct (numa, distances) end -let make_numa_common ~cores_per_numa (distances : Distances.t) = +let make_numa_common ~logical_per_physical ~cores_per_numa + (distances : Distances.t) = + (* cores_per_numa refers to logical cores, i.e. cpus *) let numa, distances = distances in let cpu_to_node = - Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa) + Array.init (cores_per_numa * numa) (fun cpu -> cpu / cores_per_numa) + and node_cores = + (* core here refers to physical *) + Array.init numa (fun _ -> cores_per_numa / logical_per_physical) in Option.map (fun d -> (cores_per_numa * numa, d)) - (NUMA.make ~distances ~cpu_to_node) + (NUMA.make ~distances ~cpu_to_node ~node_cores) let make_numa ~numa ~cores = let cores_per_numa = cores / numa in - match make_numa_common ~cores_per_numa (Distances.example numa) with + match + make_numa_common ~logical_per_physical:2 ~cores_per_numa + (Distances.example numa) + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> d let make_numa_amd ~cores_per_numa = - match make_numa_common ~cores_per_numa Distances.opteron with + match + make_numa_common ~cores_per_numa ~logical_per_physical:2 Distances.opteron + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> @@ -304,7 +314,9 @@ let distances_tests = in let test_of_spec (name, distances, expected) = let test () = - let numa_t = make_numa_common ~cores_per_numa:1 distances in + let numa_t = + make_numa_common ~logical_per_physical:1 ~cores_per_numa:1 distances + in match (expected, numa_t) with | None, None -> () diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4af94d7b96c..3548d51493f 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -908,8 +908,18 @@ let numa_hierarchy = lazy (let xcext = get_handle () in let distances = (numainfo xcext).distances in - let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in - NUMA.make ~distances ~cpu_to_node + let topoinfo = cputopoinfo xcext in + let core t = t.core and node t = t.node in + let cpu_to_node = topoinfo |> Array.map node + and node_cores = + let module IntSet = Set.Make (Int) in + let a = Array.make (Array.length distances) IntSet.empty in + Array.iter + (fun t -> a.(node t) <- IntSet.add (core t) a.(node t)) + topoinfo ; + Array.map IntSet.cardinal a + in + NUMA.make ~distances ~cpu_to_node ~node_cores ) let numa_mutex = Mutex.create () From ff659cfb6664751705cb2d1dd72bea3b63011784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 27 Nov 2025 10:44:47 +0000 Subject: [PATCH 02/25] CA-420968: ensure compatibility between NUMARequest.fits and plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The planner explicitly looks at the NUMARequest fields and checks that they are non-zero. However if more fields get added in the future this leads to an assertion failure, where the planner thinks it has found a solution, but NUMARequest.fits returns false. Ensure consistency: use `fits` in the planner to check that we've reached a solution. If the remaining request doesn't fit into an empty node, then the request is not empty. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/softaffinity.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/lib/softaffinity.ml b/ocaml/xenopsd/lib/softaffinity.ml index 1e7231506da..10fbdbea786 100644 --- a/ocaml/xenopsd/lib/softaffinity.ml +++ b/ocaml/xenopsd/lib/softaffinity.ml @@ -39,7 +39,7 @@ let plan host nodes ~vm = (Fmt.to_to_string NUMAResource.pp_dump allocated) (Fmt.to_to_string NUMARequest.pp_dump remaining) avg ; - if remaining.NUMARequest.memory > 0L || remaining.NUMARequest.vcpus > 0 then + if not (NUMARequest.fits remaining NUMAResource.empty) then (* [vm] doesn't fit on these nodes *) None else From 3671ba0e7e1364aa76304b61be46f7bfbbf7b3a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 15:20:28 +0000 Subject: [PATCH 03/25] CA-420968: track number of physical cores during a NUMA planning request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The requested number of cores is still 0, so no functional change. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/topology.ml | 20 +++++++++++++++++--- ocaml/xenopsd/lib/topology.mli | 8 ++++---- ocaml/xenopsd/lib/xenops_server.ml | 2 ++ ocaml/xenopsd/test/test_topology.ml | 2 +- ocaml/xenopsd/xc/domain.ml | 11 ++++++----- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index 7234153505c..f4291ec06f3 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -58,23 +58,36 @@ module NUMAResource = struct end module NUMARequest = struct - type t = {memory: int64; vcpus: int} + type t = {memory: int64; vcpus: int; cores: int} - let make ~memory ~vcpus = + let make ~memory ~vcpus ~cores = if Int64.compare memory 0L < 0 then invalid_arg (Printf.sprintf "NUMARequest: memory must be > 0: %Ld" memory) ; if vcpus < 0 then invalid_arg (Printf.sprintf "vcpus cannot be negative: %d" vcpus) ; - {memory; vcpus} + if cores < 0 then + invalid_arg (Printf.sprintf "cores cannot be negative: %d" cores) ; + {memory; vcpus; cores} let fits requested available = + (* this is a hard constraint: a VM cannot boot if it doesn't have + enough memory *) Int64.compare requested.memory available.NUMAResource.memfree <= 0 + (* this is a soft constraint: a VM can still boot if the (soft) affinity + constraint is not met, although if hard affinity is used this is a hard + constraint too *) && CPUSet.(cardinal available.NUMAResource.affinity >= requested.vcpus) + && (* this is an optional constraint: it is desirable to be able to leave + hyperthread siblings idle, when the system is not busy. + However requested.cores can also be 0. + *) + available.NUMAResource.cores >= requested.cores let shrink a b = make ~memory:(max 0L (Int64.sub a.memory b.NUMAResource.memfree)) ~vcpus:(max 0 (a.vcpus - CPUSet.cardinal b.NUMAResource.affinity)) + ~cores:(max 0 (a.cores - b.NUMAResource.cores)) let pp_dump = Fmt.( @@ -82,6 +95,7 @@ module NUMARequest = struct [ Dump.field "memory" (fun t -> t.memory) int64 ; Dump.field "vcpus" (fun t -> t.vcpus) int + ; Dump.field "cores" (fun t -> t.cores) int ] ) end diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index d9263b58325..8211ffa4ec2 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -62,11 +62,11 @@ end module NUMARequest : sig (** A (VM) requesting resources *) - type t = private {memory: int64; vcpus: int} + type t = private {memory: int64; vcpus: int; cores: int} - val make : memory:int64 -> vcpus:int -> t - (**[make ~memory ~vcpus] constructs a request. [memory] and [vcpus] must be - strictly positive. *) + val make : memory:int64 -> vcpus:int -> cores:int -> t + (**[make ~memory ~vcpus ~cores] constructs a request. [memory], [vcpus] and + [cores] must be strictly positive. *) val fits : t -> NUMAResource.t -> bool (** [fits requested available] checks whether the [available] resources can diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 6a06b36ba14..97c01d89c94 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3627,6 +3627,8 @@ let affinity_of_numa_affinity_policy = let open Xenops_interface.Host in function Any | Best_effort -> Soft | Best_effort_hard -> Hard +let cores_of_numa_affinity_policy _policy ~vcpus:_ = 0 + module HOST = struct let stat _ dbg = Debug.with_thread_associated dbg diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index 629d42343b4..f3d40d0f42d 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -216,7 +216,7 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = |> List.fold_left (fun (costs_old, costs_new, plans) i -> D.debug "Planning VM %d" i ; - let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores in + let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores ~cores:0 in match Softaffinity.plan h nodes ~vm with | None -> Alcotest.fail "No NUMA plan" diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 3548d51493f..a2da2345e4b 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -945,7 +945,7 @@ let set_affinity = function | Xenops_server.Soft -> Xenctrlext.vcpu_setaffinity_soft -let numa_placement domid ~vcpus ~memory affinity = +let numa_placement domid ~vcpus ~cores ~memory affinity = let open Xenctrlext in let open Topology in with_lock numa_mutex (fun () -> @@ -959,7 +959,7 @@ let numa_placement domid ~vcpus ~memory affinity = numa_meminfo ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) in - let vm = NUMARequest.make ~memory ~vcpus in + let vm = NUMARequest.make ~memory ~vcpus ~cores in let nodea = match !numa_resources with | None -> @@ -1096,10 +1096,11 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = D.debug "VM has hard affinity set, skipping NUMA optimization" ; None ) else - let affinity = - Xenops_server.affinity_of_numa_affinity_policy pin + let affinity = Xenops_server.affinity_of_numa_affinity_policy pin + and cores = + Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - numa_placement domid ~vcpus + numa_placement domid ~vcpus ~cores ~memory:(Int64.mul memory.xen_max_mib 1048576L) affinity |> Option.map fst From 5d32507881544c05246fbfec7e0a511dd9971f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 16:09:40 +0000 Subject: [PATCH 04/25] CA-420968: introduce an explicit name for the current NUMA policy: Prio_mem_only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current NUMA policy prioritizes reducing cross-NUMA node memory traffic by picking the smallest set of NUMA nodes that fit a VM. It doesn't look at how this affects CPU overload within a NUMA node, or whether the local bandwidth of each NUMA node is balanced or not. Give this policy an explicit name, `Prio_mem_only`, and when the "compat" setting in `xenopsd.conf` is used (`numa-placement=true`), then explicitly use this policy instead of Best-effort. Currently Best-effort is still equivalent to this policy, but that'll change in a follow-up commit. Introduce a new xenopsd.conf entry `numa-best-effort-prio-mem-only`, which can be used to explicitly revert best effort to the current policy. (currently this is a no-op, because there is only one best-effort policy). Future policies should also look at CPU overload. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-idl/xen/xenops_interface.ml | 2 ++ ocaml/xenopsd/lib/xenops_server.ml | 9 +++++++-- ocaml/xenopsd/lib/xenopsd.ml | 9 +++++++++ ocaml/xenopsd/xc/domain.ml | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 8 +++++++- 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index a67c51b0131..9ddcd9753a1 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -494,6 +494,8 @@ module Host = struct node, and soft-pins its VCPUs to the node, if possible. Otherwise behaves like Any. *) | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) + | Prio_mem_only + (** Prioritizes reducing memory bandwidth, ignores CPU overload *) [@@deriving rpcty] type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 97c01d89c94..e9fab3b4482 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3622,12 +3622,17 @@ let string_of_numa_affinity_policy = "best-effort" | Best_effort_hard -> "best-effort-hard" + | Prio_mem_only -> + "prio-mem-only" let affinity_of_numa_affinity_policy = let open Xenops_interface.Host in - function Any | Best_effort -> Soft | Best_effort_hard -> Hard + function + | Any | Best_effort | Prio_mem_only -> Soft | Best_effort_hard -> Hard -let cores_of_numa_affinity_policy _policy ~vcpus:_ = 0 +let cores_of_numa_affinity_policy policy ~vcpus:_ = + let open Xenops_interface.Host in + match policy with _ -> 0 module HOST = struct let stat _ dbg = diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index d4a08e92be7..c5242073237 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -70,6 +70,8 @@ let pvinpvh_xen_cmdline = ref "pv-shim console=xen" let numa_placement_compat = ref true +let numa_best_effort_prio_mem_only = ref false + (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -263,6 +265,13 @@ let options = , (fun () -> string_of_bool !numa_placement_compat) , "NUMA-aware placement of VMs (deprecated, use XAPI setting)" ) + ; ( "numa-best-effort-prio-mem-only" + , Arg.Bool (fun x -> numa_best_effort_prio_mem_only := x) + , (fun () -> string_of_bool !numa_best_effort_prio_mem_only) + , "Revert to the previous 'best effort' NUMA policy, where we only \ + filtered NUMA nodes based on available memory. Only use if there are \ + issues with the new best effort policy" + ) ; ( "pci-quarantine" , Arg.Bool (fun b -> pci_quarantine := b) , (fun () -> string_of_bool !pci_quarantine) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index a2da2345e4b..90df781d8f2 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1090,7 +1090,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = match !Xenops_server.numa_placement with | Any -> None - | (Best_effort | Best_effort_hard) as pin -> + | (Best_effort | Best_effort_hard | Prio_mem_only) as pin -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> if hard_affinity <> [] then ( D.debug "VM has hard affinity set, skipping NUMA optimization" ; diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 5274569ef4e..1ea719479d1 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -5389,7 +5389,13 @@ let init () = ) ; Device.Backend.init () ; Xenops_server.default_numa_affinity_policy := - if !Xenopsd.numa_placement_compat then Best_effort else Any ; + if !Xenopsd.numa_placement_compat then + if !Xenopsd.numa_best_effort_prio_mem_only then + Prio_mem_only + else + Best_effort + else + Any ; info "Default NUMA affinity policy is '%s'" Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; From b0978541760225bdad31b69e034398ae0c750b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 16:14:54 +0000 Subject: [PATCH 05/25] CA-420968: avoid large performance hit on small NUMA nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NUMA optimized placement can have a large performance hit on machines with small NUMA nodes and VMs with a large number of vCPUs. For example a machine that has 2 sockets, which can run at most 32 vCPUs in a single socket (NUMA node), and a VM with 32 vCPUs. Usually Xen would try to spread the load across actual cores, and avoid the hyperthread siblings, e.g. using CPUs 0,2,4,etc. But when NUMA placement is used all the vCPUs must be in the same NUMA node. If that NUMA node doesn't have enough cores, then Xen will have no choice but to use CPUs 0,1,2,3,etc. Hyperthread siblings share resources, and if you try to use both at the same time you get a big performance hit, depending on the workload. Avoid this by "requesting" cores=vcpus for each VM, which will make the placement algorithm choose the next size up in terms of NUMA nodes (i.e. instead of 1 NUMA node, use 2,3 as needed, falling back to using all nodes if needed). The potential gain from reducing memory latency with a NUMA optimized placement (~20% on Intel Memory Latency Checker: Idle latency) is outweighed by the potential loss due to reduced CPU capacity (40%-75% on OpenSSL, POV-Ray, and OpenVINO), so this is the correct trade-off. If the NUMA node is large enough, or if the VMs have a small number of vCPUs then we still try to use a single NUMA node as we did previously. The performance difference can be reproduced and verified easily by running `openssl speed -multi 32 rsa4096` on a 32 vCPU VM on a host that has 2 NUMA nodes, with 32 PCPUs each, and 2 threads per core. This introduces a policy that can control whether we want to filter out NUMA nodes with too few cores. Although we want to enable this filter by default, we still want an "escape hatch" to turn it off if we find problems with it. That is why the "compat" setting (numa_placement=true) in xenopsd.conf reverts back to the old policy, which is now named explicitly as Prio_mem_only. There could still be workloads where optimizing for memory bandwidth makes more sense (although that is a property of the NUMA node, not of individual VMs), so although it might be desirable for this to be a VM policy, it cannot, because it affects other VMs too. TODO: when sched-gran=core this should be turned off. That always has the performance hit, so might as well use smaller NUMA nodes if available. For now this isn't exposed yet as a XAPI-level policy, because that requires more changes (to also sort by free cores on a node, and to also sort at the pool level by free cpus on a host). Once we have those changes we can introduce a new policy `prio_core_mem` to sort by free cores first, then by free memory, and requires cores>=vcpus (i.e. cpus>=vcpus*threads_per_cores) when choosing a node. This changes the default to the new setting, which should be equal or an improvement in the general case. An "escape hatch" to revert to the previous behaviour is to set `numa-placement=true` in xenopsd.conf, and the XAPI host-level policy to 'default_policy'. Signed-off-by: Edwin Török --- ocaml/xapi-idl/xen/xenops_interface.ml | 4 +++- ocaml/xenopsd/lib/xenops_server.ml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 9ddcd9753a1..3920736afd5 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -492,7 +492,9 @@ module Host = struct | Best_effort (** Best-effort placement. Assigns the memory of the VM to a single node, and soft-pins its VCPUs to the node, if possible. Otherwise - behaves like Any. *) + behaves like Any. + The node(s) need to have enough cores to run all the vCPUs of the VM + *) | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) | Prio_mem_only (** Prioritizes reducing memory bandwidth, ignores CPU overload *) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e9fab3b4482..9703f4c2a93 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3630,9 +3630,9 @@ let affinity_of_numa_affinity_policy = function | Any | Best_effort | Prio_mem_only -> Soft | Best_effort_hard -> Hard -let cores_of_numa_affinity_policy policy ~vcpus:_ = +let cores_of_numa_affinity_policy policy ~vcpus = let open Xenops_interface.Host in - match policy with _ -> 0 + match policy with Any | Prio_mem_only -> 0 | _ -> vcpus module HOST = struct let stat _ dbg = From 2dbbbd0e9571c3338dc8d080f0efdd100c08999e Mon Sep 17 00:00:00 2001 From: Guillaume Date: Thu, 27 Nov 2025 11:26:04 +0100 Subject: [PATCH 06/25] [doc] add missing command to xs-trace Signed-off-by: Guillaume --- doc/content/toolstack/features/Tracing/index.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/content/toolstack/features/Tracing/index.md b/doc/content/toolstack/features/Tracing/index.md index c54441bbb68..4c90c570699 100644 --- a/doc/content/toolstack/features/Tracing/index.md +++ b/doc/content/toolstack/features/Tracing/index.md @@ -81,14 +81,17 @@ and also assist newcomers in onboarding to the project. By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward these traces to another location or endpoint using the `xs-trace` tool. For example, if you have -a *Jaeger* server running locally, you can run: +a *Jaeger* server running locally, you can copy a trace to an endpoint by running: ```sh -xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans +xs-trace cp /var/log/dt/ http://127.0.0.1:9411/api/v2/spans ``` You will then be able to visualize the traces in Jaeger. +The `xs-trace` tool also supports trace files in `.ndjson` and compressed `.zst` formats, so +you can copy or forward these files directly as well. + ### Tagging Trace Sessions for Easier Search #### Specific attributes From 2809d72786961624a8dd36eb25a7f76cb1f1a3ca Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Jun 2025 13:27:20 +0100 Subject: [PATCH 07/25] numa_placement: use Seq instead of List MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This partially applies the following commit to reduce the complexity of a Xen-4.20 patch: > xenopsd-xc: do not try keep track of free memory when planning NUMA nodes (CA-411684) > > Free memory is now properly accounted for because the memory pages are claimed > within the NUMA mutex, so there's no need to have double tracking. > > On top of that, this code never increased the free memory, which means that it > always reached a point where it was impossible to allocate a domain into a > single numa node. > Signed-off-by: Pau Ruiz Safont However it doesn't actually drop the free memory accounting code, so: No functional change Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4af94d7b96c..83556cd8c1e 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -942,20 +942,19 @@ let numa_placement domid ~vcpus ~memory affinity = let ( let* ) = Option.bind in let xcext = get_handle () in let* host = Lazy.force numa_hierarchy in - let numa_meminfo = (numainfo xcext).memory |> Array.to_list in + let numa_meminfo = (numainfo xcext).memory |> Array.to_seq in let nodes = - ListLabels.map2 - (NUMA.nodes host |> List.of_seq) - numa_meminfo - ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) + Seq.map2 + (fun node m -> NUMA.resource host node ~memory:m.memfree) + (NUMA.nodes host) numa_meminfo in let vm = NUMARequest.make ~memory ~vcpus in let nodea = match !numa_resources with | None -> - Array.of_list nodes + Array.of_seq nodes | Some a -> - Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + Array.map2 NUMAResource.min_memory (Array.of_seq nodes) a in numa_resources := Some nodea ; let memory_plan = From 98d5077c8da01047068dcf0a27be7808b507d6cf Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 28 Nov 2025 13:12:29 +0800 Subject: [PATCH 08/25] CA-419908: Move module Watcher ahead for future use in module VM Signed-off-by: Ming Lu --- ocaml/xenopsd/xc/xenops_server_xen.ml | 818 +++++++++++++------------- 1 file changed, 409 insertions(+), 409 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 5274569ef4e..4f73cff49b3 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -26,6 +26,12 @@ open D module RRDD = Rrd_client.Client module StringSet = Set.Make (String) +module IntMap = Map.Make (struct + type t = int + + let compare = compare +end) + let finally = Xapi_stdext_pervasives.Pervasiveext.finally let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute @@ -1166,84 +1172,421 @@ let dm_of ~vm = let vtpm_of ~vm = match vm.Vm.ty with Vm.HVM h -> h.tpm | _ -> None -module VM = struct - open Vm +module Actions = struct + (* CA-76600: the rtc/timeoffset needs to be maintained over a migrate. *) + let store_rtc_timeoffset vm timeoffset = + let _ = + DB.update vm + (Option.map (function {VmExtra.persistent} as extra -> + ( match persistent with + | {VmExtra.ty= Some (Vm.HVM hvm_info); _} -> + let platformdata = + ("timeoffset", timeoffset) + :: List.remove_assoc "timeoffset" persistent.platformdata + in + let persistent = + { + persistent with + VmExtra.ty= Some (Vm.HVM {hvm_info with Vm.timeoffset}) + ; platformdata + } + in + debug "VM = %s; rtc/timeoffset <- %s" vm timeoffset ; + VmExtra.{persistent} + | _ -> + extra + ) + ) + ) + in + () - let will_be_hvm vm = match vm.ty with HVM _ -> true | _ -> false + let xenbus_connected = Xenbus_utils.(int_of Connected) |> string_of_int - let profile_of ~vm = - if will_be_hvm vm then - Some (choose_qemu_dm vm.Xenops_interface.Vm.platformdata) - else - None + let maybe_update_pv_drivers_detected ~xc ~xs domid path = + let vm = get_uuid ~xc domid |> Uuidx.to_string in + Option.iter + (function + | {VmExtra.persistent} -> ( + try + let value = xs.Xs.read path in + let pv_drivers_detected = + match + ( value = xenbus_connected + , persistent.VmExtra.pv_drivers_detected + ) + with + | true, false -> + (* State "connected" (4) means that PV drivers are present for + this device *) + debug "VM = %s; found PV driver evidence on %s (value = %s)" + vm path value ; + true + | false, true -> + (* This device is not connected, while earlier we detected PV + drivers. We conclude that drivers are still present if any + other device is connected. *) + let devices = Device_common.list_frontends ~xs domid in + let found = + (* Return `true` as soon as a device in state 4 is found. *) + List.exists + (fun device -> + try + xs.Xs.read + (Device_common.backend_state_path_of_device ~xs + device + ) + = xenbus_connected + with Xs_protocol.Enoent _ -> false + ) + devices + in + if not found then (* No devices in state "connected" (4) *) + debug "VM = %s; lost PV driver evidence" vm ; + found + | _ -> + (* No change *) + persistent.VmExtra.pv_drivers_detected + in + let updated = + DB.update vm + (Option.map (function {VmExtra.persistent} -> + let persistent = + {persistent with VmExtra.pv_drivers_detected} + in + VmExtra.{persistent} + ) + ) + in + if updated then + Updates.add (Dynamic.Vm vm) internal_updates + with Xs_protocol.Enoent _ -> + warn "Watch event on %s fired but couldn't read from it" path ; + () + (* the path must have disappeared immediately after the watch fired. + Let's treat this as if we never saw it. *) + ) + ) + (DB.read vm) - let dm_of ~vm = dm_of ~vm:vm.Vm.id + let interesting_paths_for_domain domid uuid = + let open Printf in + [ + sprintf "/local/domain/%d/attr" domid + ; sprintf "/local/domain/%d/data/ts" domid + ; sprintf "/local/domain/%d/data/service" domid + ; sprintf "/local/domain/%d/data/pvs_target" domid + ; sprintf "/local/domain/%d/memory/target" domid + ; sprintf "/local/domain/%d/memory/uncooperative" domid + ; sprintf "/local/domain/%d/console/vnc-port" domid + ; sprintf "/local/domain/%d/console/tc-port" domid + ; Service.Qemu.pidxenstore_path_signal domid + ; sprintf "/local/domain/%d/control" domid + ; sprintf "/local/domain/%d/device" domid + ; sprintf "/local/domain/%d/rrd" domid + ; sprintf "/local/domain/%d/vm-data" domid + ; sprintf "/local/domain/%d/feature" domid + ; sprintf "/vm/%s/rtc/timeoffset" uuid + ; sprintf "/local/domain/%d/xenserver/attr" domid + ] - let compute_overhead persistent vcpu_max memory_static_max shadow_multiplier = - let open VmExtra in - let static_max_mib = Memory.mib_of_bytes_used memory_static_max in - let model = - match persistent.ty with - | Some (PV _) -> - Memory.Linux.overhead_mib - | Some (PVinPVH _) -> - Memory.PVinPVH.overhead_mib - | Some (HVM _ | PVH _) -> - Memory.HVM.overhead_mib - | None -> - failwith - "cannot compute memory overhead: unable to determine domain type" + let watch_token domid = Printf.sprintf "xenopsd-xc:domain-%d" domid + + let watches_of_device dev = + let interesting_backend_keys = + [ + "kthread-pid" + ; "tapdisk-pid" + ; "shutdown-done" + ; "hotplug-status" + ; "params" + ; "state" + ] in - model static_max_mib vcpu_max shadow_multiplier |> Memory.bytes_of_mib + let open Device_common in + let be = dev.backend.domid in + let fe = dev.frontend.domid in + let kind = string_of_kind dev.backend.kind in + let devid = dev.frontend.devid in + List.map + (fun k -> + Printf.sprintf "/local/domain/%d/backend/%s/%d/%d/%s" be kind fe devid k + ) + interesting_backend_keys - let shutdown_reason = function - | Reboot -> - Domain.Reboot - | PowerOff -> - Domain.PowerOff - | Suspend -> - Domain.Suspend - | Halt -> - Domain.Halt - | S3Suspend -> - Domain.S3Suspend + let unmanaged_domain domid id = domid > 0 && not (DB.exists id) - (* We compute our initial target at memory reservation time, done before the - domain is created. We consume this information later when the domain is - built. *) - let set_initial_target ~xs domid initial_target = - xs.Xs.write - (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) - (Int64.to_string initial_target) + let found_running_domain _domid id = + Updates.add (Dynamic.Vm id) internal_updates - let get_initial_target ~xs domid = - Int64.of_string - (xs.Xs.read - (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + let device_watches = ref IntMap.empty + + let domain_appeared _xc _xs domid = + device_watches := IntMap.add domid [] !device_watches + + let domain_disappeared _xc xs domid = + let token = watch_token domid in + List.iter + (fun d -> + List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device d) ) + (try IntMap.find domid !device_watches with Not_found -> []) ; + device_watches := IntMap.remove domid !device_watches ; + (* Anyone blocked on a domain/device operation which won't happen because + the domain just shutdown should be cancelled here. *) + debug "Cancelling watches for: domid %d" domid ; + Cancel_utils.on_shutdown ~xs domid ; + (* Finally, discard any device caching for the domid destroyed *) + DeviceCache.discard device_cache domid - let domain_type_path domid = - Printf.sprintf "/local/domain/%d/domain-type" domid + let qemu_disappeared di xc xs = + match !Xenopsd.action_after_qemu_crash with + | None -> + () + | Some action -> ( + debug "action-after-qemu-crash=%s" action ; + match action with + | "poweroff" -> + (* we do not expect a HVM guest to survive qemu disappearing, so + kill the VM *) + Domain.set_action_request ~xs di.Xenctrl.domid (Some "poweroff") + | "pause" -> + (* useful for debugging qemu *) + Domain.pause ~xc di.Xenctrl.domid + | _ -> + () + ) - let set_domain_type ~xs domid vm = - let domain_type = - match vm.ty with - | HVM _ -> - "hvm" - | PV _ -> - "pv" - | PVinPVH _ -> - "pv-in-pvh" - | PVH _ -> - "pvh" - in - xs.Xs.write (domain_type_path domid) domain_type + let add_device_watch xs dev = + let open Device_common in + debug "Adding watches for: %s" (string_of_device dev) ; + let domid = dev.frontend.domid in + let token = watch_token domid in + List.iter (Xenstore_watch.watch ~xs token) (watches_of_device dev) ; + device_watches := + IntMap.add domid + (dev :: IntMap.find domid !device_watches) + !device_watches - let get_domain_type ~xs di = - try - match xs.Xs.read (domain_type_path di.Xenctrl.domid) with - | "hvm" -> - Domain_HVM + let remove_device_watch xs dev = + let open Device_common in + debug "Removing watches for: %s" (string_of_device dev) ; + let domid = dev.frontend.domid in + let current = IntMap.find domid !device_watches in + let token = watch_token domid in + List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device dev) ; + device_watches := + IntMap.add domid (List.filter (fun x -> x <> dev) current) !device_watches + + let watch_fired xc xs path domains watches = + let look_for_different_devices domid = + if not (Xenstore_watch.IntSet.mem domid watches) then + debug "Ignoring frontend device watch on unmanaged domain: %d" domid + else if not (IntMap.mem domid !device_watches) then + warn + "Xenstore watch fired, but no entry for domid %d in device watches \ + list" + domid + else + let devices = IntMap.find domid !device_watches in + let devices' = Device_common.list_frontends ~xs domid in + let old_devices = + Xapi_stdext_std.Listext.List.set_difference devices devices' + in + let new_devices = + Xapi_stdext_std.Listext.List.set_difference devices' devices + in + List.iter (add_device_watch xs) new_devices ; + List.iter (remove_device_watch xs) old_devices + in + let uuid_of_domain di = + let string_of_domain_handle handle = + Array.to_list handle |> List.map string_of_int |> String.concat "; " + in + match Uuidx.of_int_array di.Xenctrl.handle with + | Some x -> + x + | None -> + failwith + (Printf.sprintf "VM handle for domain %i is an invalid uuid: %a" + di.Xenctrl.domid + (fun () -> string_of_domain_handle) + di.Xenctrl.handle + ) + in + let fire_event_on_vm domid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring watch on shutdown domain %d" d + else + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + Updates.add (Dynamic.Vm id) internal_updates + in + let fire_event_on_device domid kind devid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring watch on shutdown domain %d" d + else + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + let update = + match kind with + | "vbd" | "vbd3" | "qdisk" | "9pfs" -> + let devid' = + devid + |> int_of_string + |> Device_number.of_xenstore_key + |> Device_number.to_linux_device + in + Some (Dynamic.Vbd (id, devid')) + | "vif" -> + Some (Dynamic.Vif (id, devid)) + | x -> + debug "Unknown device kind: '%s'" x ; + None + in + Option.iter (fun x -> Updates.add x internal_updates) update + in + let fire_event_on_qemu domid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring qemu-pid-signal watch on shutdown domain %d" d + else + let signal = + try Some (xs.Xs.read (Service.Qemu.pidxenstore_path_signal d)) + with _ -> None + in + match signal with + | None -> + () + | Some signal -> + debug "Received unexpected qemu-pid-signal %s for domid %d" signal d ; + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + qemu_disappeared di xc xs ; + Updates.add (Dynamic.Vm id) internal_updates + in + match Astring.String.cuts ~empty:false ~sep:"/" path with + | "local" + :: "domain" + :: domid + :: "backend" + :: kind + :: frontend + :: devid + :: key -> + debug + "Watch on backend domid: %s kind: %s -> frontend domid: %s devid: %s" + domid kind frontend devid ; + fire_event_on_device frontend kind devid ; + (* If this event was a state change then this might be the first time we + see evidence of PV drivers *) + if key = ["state"] then + maybe_update_pv_drivers_detected ~xc ~xs (int_of_string frontend) path + | "local" :: "domain" :: frontend :: "device" :: _ -> + look_for_different_devices (int_of_string frontend) + | ["local"; "domain"; domid; "qemu-pid-signal"] -> + fire_event_on_qemu domid + | "local" :: "domain" :: domid :: _ -> + fire_event_on_vm domid + | ["vm"; uuid; "rtc"; "timeoffset"] -> + let timeoffset = try Some (xs.Xs.read path) with _ -> None in + Option.iter + (fun timeoffset -> + (* Store the rtc/timeoffset for migrate *) + store_rtc_timeoffset uuid timeoffset ; + (* Tell the higher-level toolstack about this too *) + Updates.add (Dynamic.Vm uuid) internal_updates + ) + timeoffset + | _ -> + debug "Ignoring unexpected watch: %s" path +end + +module Watcher = Xenstore_watch.WatchXenstore (Actions) + +module VM = struct + open Vm + + let will_be_hvm vm = match vm.ty with HVM _ -> true | _ -> false + + let profile_of ~vm = + if will_be_hvm vm then + Some (choose_qemu_dm vm.Xenops_interface.Vm.platformdata) + else + None + + let dm_of ~vm = dm_of ~vm:vm.Vm.id + + let compute_overhead persistent vcpu_max memory_static_max shadow_multiplier = + let open VmExtra in + let static_max_mib = Memory.mib_of_bytes_used memory_static_max in + let model = + match persistent.ty with + | Some (PV _) -> + Memory.Linux.overhead_mib + | Some (PVinPVH _) -> + Memory.PVinPVH.overhead_mib + | Some (HVM _ | PVH _) -> + Memory.HVM.overhead_mib + | None -> + failwith + "cannot compute memory overhead: unable to determine domain type" + in + model static_max_mib vcpu_max shadow_multiplier |> Memory.bytes_of_mib + + let shutdown_reason = function + | Reboot -> + Domain.Reboot + | PowerOff -> + Domain.PowerOff + | Suspend -> + Domain.Suspend + | Halt -> + Domain.Halt + | S3Suspend -> + Domain.S3Suspend + + (* We compute our initial target at memory reservation time, done before the + domain is created. We consume this information later when the domain is + built. *) + let set_initial_target ~xs domid initial_target = + xs.Xs.write + (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + (Int64.to_string initial_target) + + let get_initial_target ~xs domid = + Int64.of_string + (xs.Xs.read + (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + ) + + let domain_type_path domid = + Printf.sprintf "/local/domain/%d/domain-type" domid + + let set_domain_type ~xs domid vm = + let domain_type = + match vm.ty with + | HVM _ -> + "hvm" + | PV _ -> + "pv" + | PVinPVH _ -> + "pv-in-pvh" + | PVH _ -> + "pvh" + in + xs.Xs.write (domain_type_path domid) domain_type + + let get_domain_type ~xs di = + try + match xs.Xs.read (domain_type_path di.Xenctrl.domid) with + | "hvm" -> + Domain_HVM | "pv" -> Domain_PV | "pv-in-pvh" -> @@ -4957,349 +5300,6 @@ module UPDATES = struct let get last timeout = Updates.get "UPDATES.get" last timeout internal_updates end -module IntMap = Map.Make (struct - type t = int - - let compare = compare -end) - -module Actions = struct - (* CA-76600: the rtc/timeoffset needs to be maintained over a migrate. *) - let store_rtc_timeoffset vm timeoffset = - let _ = - DB.update vm - (Option.map (function {VmExtra.persistent} as extra -> - ( match persistent with - | {VmExtra.ty= Some (Vm.HVM hvm_info); _} -> - let platformdata = - ("timeoffset", timeoffset) - :: List.remove_assoc "timeoffset" persistent.platformdata - in - let persistent = - { - persistent with - VmExtra.ty= Some (Vm.HVM {hvm_info with Vm.timeoffset}) - ; platformdata - } - in - debug "VM = %s; rtc/timeoffset <- %s" vm timeoffset ; - VmExtra.{persistent} - | _ -> - extra - ) - ) - ) - in - () - - let xenbus_connected = Xenbus_utils.(int_of Connected) |> string_of_int - - let maybe_update_pv_drivers_detected ~xc ~xs domid path = - let vm = get_uuid ~xc domid |> Uuidx.to_string in - Option.iter - (function - | {VmExtra.persistent} -> ( - try - let value = xs.Xs.read path in - let pv_drivers_detected = - match - ( value = xenbus_connected - , persistent.VmExtra.pv_drivers_detected - ) - with - | true, false -> - (* State "connected" (4) means that PV drivers are present for - this device *) - debug "VM = %s; found PV driver evidence on %s (value = %s)" - vm path value ; - true - | false, true -> - (* This device is not connected, while earlier we detected PV - drivers. We conclude that drivers are still present if any - other device is connected. *) - let devices = Device_common.list_frontends ~xs domid in - let found = - (* Return `true` as soon as a device in state 4 is found. *) - List.exists - (fun device -> - try - xs.Xs.read - (Device_common.backend_state_path_of_device ~xs - device - ) - = xenbus_connected - with Xs_protocol.Enoent _ -> false - ) - devices - in - if not found then (* No devices in state "connected" (4) *) - debug "VM = %s; lost PV driver evidence" vm ; - found - | _ -> - (* No change *) - persistent.VmExtra.pv_drivers_detected - in - let updated = - DB.update vm - (Option.map (function {VmExtra.persistent} -> - let persistent = - {persistent with VmExtra.pv_drivers_detected} - in - VmExtra.{persistent} - ) - ) - in - if updated then - Updates.add (Dynamic.Vm vm) internal_updates - with Xs_protocol.Enoent _ -> - warn "Watch event on %s fired but couldn't read from it" path ; - () - (* the path must have disappeared immediately after the watch fired. - Let's treat this as if we never saw it. *) - ) - ) - (DB.read vm) - - let interesting_paths_for_domain domid uuid = - let open Printf in - [ - sprintf "/local/domain/%d/attr" domid - ; sprintf "/local/domain/%d/data/ts" domid - ; sprintf "/local/domain/%d/data/service" domid - ; sprintf "/local/domain/%d/data/pvs_target" domid - ; sprintf "/local/domain/%d/memory/target" domid - ; sprintf "/local/domain/%d/memory/uncooperative" domid - ; sprintf "/local/domain/%d/console/vnc-port" domid - ; sprintf "/local/domain/%d/console/tc-port" domid - ; Service.Qemu.pidxenstore_path_signal domid - ; sprintf "/local/domain/%d/control" domid - ; sprintf "/local/domain/%d/device" domid - ; sprintf "/local/domain/%d/rrd" domid - ; sprintf "/local/domain/%d/vm-data" domid - ; sprintf "/local/domain/%d/feature" domid - ; sprintf "/vm/%s/rtc/timeoffset" uuid - ; sprintf "/local/domain/%d/xenserver/attr" domid - ] - - let watch_token domid = Printf.sprintf "xenopsd-xc:domain-%d" domid - - let watches_of_device dev = - let interesting_backend_keys = - [ - "kthread-pid" - ; "tapdisk-pid" - ; "shutdown-done" - ; "hotplug-status" - ; "params" - ; "state" - ] - in - let open Device_common in - let be = dev.backend.domid in - let fe = dev.frontend.domid in - let kind = string_of_kind dev.backend.kind in - let devid = dev.frontend.devid in - List.map - (fun k -> - Printf.sprintf "/local/domain/%d/backend/%s/%d/%d/%s" be kind fe devid k - ) - interesting_backend_keys - - let unmanaged_domain domid id = domid > 0 && not (DB.exists id) - - let found_running_domain _domid id = - Updates.add (Dynamic.Vm id) internal_updates - - let device_watches = ref IntMap.empty - - let domain_appeared _xc _xs domid = - device_watches := IntMap.add domid [] !device_watches - - let domain_disappeared _xc xs domid = - let token = watch_token domid in - List.iter - (fun d -> - List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device d) - ) - (try IntMap.find domid !device_watches with Not_found -> []) ; - device_watches := IntMap.remove domid !device_watches ; - (* Anyone blocked on a domain/device operation which won't happen because - the domain just shutdown should be cancelled here. *) - debug "Cancelling watches for: domid %d" domid ; - Cancel_utils.on_shutdown ~xs domid ; - (* Finally, discard any device caching for the domid destroyed *) - DeviceCache.discard device_cache domid - - let qemu_disappeared di xc xs = - match !Xenopsd.action_after_qemu_crash with - | None -> - () - | Some action -> ( - debug "action-after-qemu-crash=%s" action ; - match action with - | "poweroff" -> - (* we do not expect a HVM guest to survive qemu disappearing, so - kill the VM *) - Domain.set_action_request ~xs di.Xenctrl.domid (Some "poweroff") - | "pause" -> - (* useful for debugging qemu *) - Domain.pause ~xc di.Xenctrl.domid - | _ -> - () - ) - - let add_device_watch xs dev = - let open Device_common in - debug "Adding watches for: %s" (string_of_device dev) ; - let domid = dev.frontend.domid in - let token = watch_token domid in - List.iter (Xenstore_watch.watch ~xs token) (watches_of_device dev) ; - device_watches := - IntMap.add domid - (dev :: IntMap.find domid !device_watches) - !device_watches - - let remove_device_watch xs dev = - let open Device_common in - debug "Removing watches for: %s" (string_of_device dev) ; - let domid = dev.frontend.domid in - let current = IntMap.find domid !device_watches in - let token = watch_token domid in - List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device dev) ; - device_watches := - IntMap.add domid (List.filter (fun x -> x <> dev) current) !device_watches - - let watch_fired xc xs path domains watches = - let look_for_different_devices domid = - if not (Xenstore_watch.IntSet.mem domid watches) then - debug "Ignoring frontend device watch on unmanaged domain: %d" domid - else if not (IntMap.mem domid !device_watches) then - warn - "Xenstore watch fired, but no entry for domid %d in device watches \ - list" - domid - else - let devices = IntMap.find domid !device_watches in - let devices' = Device_common.list_frontends ~xs domid in - let old_devices = - Xapi_stdext_std.Listext.List.set_difference devices devices' - in - let new_devices = - Xapi_stdext_std.Listext.List.set_difference devices' devices - in - List.iter (add_device_watch xs) new_devices ; - List.iter (remove_device_watch xs) old_devices - in - let uuid_of_domain di = - let string_of_domain_handle handle = - Array.to_list handle |> List.map string_of_int |> String.concat "; " - in - match Uuidx.of_int_array di.Xenctrl.handle with - | Some x -> - x - | None -> - failwith - (Printf.sprintf "VM handle for domain %i is an invalid uuid: %a" - di.Xenctrl.domid - (fun () -> string_of_domain_handle) - di.Xenctrl.handle - ) - in - let fire_event_on_vm domid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring watch on shutdown domain %d" d - else - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - Updates.add (Dynamic.Vm id) internal_updates - in - let fire_event_on_device domid kind devid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring watch on shutdown domain %d" d - else - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - let update = - match kind with - | "vbd" | "vbd3" | "qdisk" | "9pfs" -> - let devid' = - devid - |> int_of_string - |> Device_number.of_xenstore_key - |> Device_number.to_linux_device - in - Some (Dynamic.Vbd (id, devid')) - | "vif" -> - Some (Dynamic.Vif (id, devid)) - | x -> - debug "Unknown device kind: '%s'" x ; - None - in - Option.iter (fun x -> Updates.add x internal_updates) update - in - let fire_event_on_qemu domid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring qemu-pid-signal watch on shutdown domain %d" d - else - let signal = - try Some (xs.Xs.read (Service.Qemu.pidxenstore_path_signal d)) - with _ -> None - in - match signal with - | None -> - () - | Some signal -> - debug "Received unexpected qemu-pid-signal %s for domid %d" signal d ; - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - qemu_disappeared di xc xs ; - Updates.add (Dynamic.Vm id) internal_updates - in - match Astring.String.cuts ~empty:false ~sep:"/" path with - | "local" - :: "domain" - :: domid - :: "backend" - :: kind - :: frontend - :: devid - :: key -> - debug - "Watch on backend domid: %s kind: %s -> frontend domid: %s devid: %s" - domid kind frontend devid ; - fire_event_on_device frontend kind devid ; - (* If this event was a state change then this might be the first time we - see evidence of PV drivers *) - if key = ["state"] then - maybe_update_pv_drivers_detected ~xc ~xs (int_of_string frontend) path - | "local" :: "domain" :: frontend :: "device" :: _ -> - look_for_different_devices (int_of_string frontend) - | ["local"; "domain"; domid; "qemu-pid-signal"] -> - fire_event_on_qemu domid - | "local" :: "domain" :: domid :: _ -> - fire_event_on_vm domid - | ["vm"; uuid; "rtc"; "timeoffset"] -> - let timeoffset = try Some (xs.Xs.read path) with _ -> None in - Option.iter - (fun timeoffset -> - (* Store the rtc/timeoffset for migrate *) - store_rtc_timeoffset uuid timeoffset ; - (* Tell the higher-level toolstack about this too *) - Updates.add (Dynamic.Vm uuid) internal_updates - ) - timeoffset - | _ -> - debug "Ignoring unexpected watch: %s" path -end - -module Watcher = Xenstore_watch.WatchXenstore (Actions) - (* Here we analyse common startup errors in more detail and suggest the most likely fixes (e.g. switch to root, start missing service) *) From 19e5c076ef737e7389bcfb5378a0ad1855c7325c Mon Sep 17 00:00:00 2001 From: Philippe Coval Date: Fri, 28 Nov 2025 10:31:12 +0100 Subject: [PATCH 09/25] s/xe-syslog-reconfigure: Keep disclaimer in remote.conf The behaviour is same, except that the file will remain (with empty rules) even if the forwarding log has been disabled (and then prevent the disclaimer loss). Wording slightly modified to reduce the risk of concurrent management of this file, if the file is still edited by other mean, it will be done knowingly (that x-s-r may override any change soon or later). For the record remote.conf was introduced in: 468eb75dddfea6db512a8bfb4860ff2042efab66 . I am assuming that the presence of file is not checked elsewhere that in xen-api (currently only s/x-s-r is referencing this file). Signed-off-by: Philippe Coval --- scripts/xe-syslog-reconfigure | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/xe-syslog-reconfigure b/scripts/xe-syslog-reconfigure index de84881d688..fa5ad0b7969 100644 --- a/scripts/xe-syslog-reconfigure +++ b/scripts/xe-syslog-reconfigure @@ -15,11 +15,9 @@ do done +echo "# /etc/rsyslog.d/remote.conf is managed by xe-syslog-reconfigure (do not edit)" > /etc/rsyslog.d/remote.conf if [ $remote -eq 1 ]; then - echo "# /etc/rsyslog.d/remote.conf is auto-generated by xe-syslog-reconfigure" > /etc/rsyslog.d/remote.conf echo "*.* @$host" >> /etc/rsyslog.d/remote.conf -else - rm -f /etc/rsyslog.d/remote.conf fi systemctl restart rsyslog From 77c57b602d15b7a3e92c508eb786fd9e9f63f7cc Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 28 Nov 2025 13:18:30 +0000 Subject: [PATCH 10/25] ocaml/xapi: remove unused xmlrpc_sexpr module The methods were not safe and thankfully unused as well Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xmlrpc_sexpr.ml | 171 ------------------------------------- quality-gate.sh | 4 +- 2 files changed, 2 insertions(+), 173 deletions(-) delete mode 100644 ocaml/xapi/xmlrpc_sexpr.ml diff --git a/ocaml/xapi/xmlrpc_sexpr.ml b/ocaml/xapi/xmlrpc_sexpr.ml deleted file mode 100644 index d241491cdc3..00000000000 --- a/ocaml/xapi/xmlrpc_sexpr.ml +++ /dev/null @@ -1,171 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(** Functions for converting between xml-rpc and a more - compact representation based on s-expressions. -*) - -open Xml -open Xapi_stdext_std.Xstringext - -(** Accepts an xml-rpc tree of type xml.xml - with contents [child1] [child2] ... [childn] - where: - tag is an xml tag. - - child is an xml tag or a pcdata. - and converts it to an sexpr tree of type SExpr.t - with contents (tag child1 child2 ... childn) - where: - tag is an SExpr.String - - child is an SExpr.t (String or Node) - exception: - - 'member' tags are not in sexpr because they - are basically redundant information inside struct children. - security notes: - 1. there is no verification that the incoming xml-rpc tree - conforms to the xml-rpc specification. an incorrect xml-rpc tree - might result in an unexpected sexpr mapping. therefore, this - function should not be used to process unsanitized/untrusted xml-rpc trees. -*) -let xmlrpc_to_sexpr (root : xml) = - let rec visit (h : int) (xml_lt : xml list) = - match (h, xml_lt) with - | _, [] -> - [] - | _, PCData text :: _ -> - let text = String.trim text in - [SExpr.String text] - (* empty s have default value '' *) - | h, Element ("value", _, []) :: siblings -> - SExpr.String "" :: visit h siblings - (* ,, tags: ignore them and go to children *) - | h, Element ("data", _, children) :: siblings - | h, Element ("value", _, children) :: siblings - | h, Element ("name", _, children) :: siblings -> - visit (h + 1) children @ visit h siblings - (* tags *) - | h, Element ("member", _, children) :: siblings -> ( - let (mychildren : SExpr.t list) = visit (h + 1) children in - let anode = SExpr.Node mychildren in - let (mysiblings : SExpr.t list) = visit h siblings in - match mychildren with - (*name & value?*) - | [SExpr.String _; _] -> - (*is name a string?*) - anode :: mysiblings - (*then add member anode*) - | _ -> - mysiblings - (*ignore incorrect member*) - ) - (*ignore incorrect member*) - (* any other element *) - | h, Element (tag, _, children) :: siblings -> - let tag = String.trim tag in - let mytag = SExpr.String tag in - let (mychildren : SExpr.t list) = visit (h + 1) children in - let anode = SExpr.Node (mytag :: mychildren) in - let (mysiblings : SExpr.t list) = visit h siblings in - anode :: mysiblings - in - List.hd (visit 0 [root]) - -(** Accepts a tree of s-expressions of type SExpr.t - with contents (tag child1 child2 ... childn) - where: - tag is an SExpr.String - - child is an SExpr.t (String or Node) - and converts it to an xml-rpc tree of type xml.xml - with contents [child1] [child2] ... [childn] - where: - tag is an xml tag. - - child is an xml tag or a pcdata. - exception: - - 'member' tags are not in sexpr because they - are redundant information inside struct children. - security notes: - 1. there is no verification that the incoming sexpr trees - conforms to the output of xmlrpc_to_sexpr. an incorrect sexpr tree - might result in an unexpected xml-rpc mapping. therefore, this - function should not be used to process unsanitized/untrusted sexpr trees. -*) -let sexpr_to_xmlrpc (root : SExpr.t) = - let encase_with (container : string) (el : xml) = - Element (container, [], [el]) - in - let is_not_empty_tag (el : xml) = - match el with Element ("", _, _) -> false | _ -> true - in - let rec visit (h : int) (parent : SExpr.t) (sexpr : SExpr.t) = - match (h, parent, sexpr) with - (* sexpr representing a struct with member tags *) - | ( h - , SExpr.Node (SExpr.String "struct" :: _) - , SExpr.Node (SExpr.String name :: avalue :: _) ) -> ( - match avalue with - | SExpr.String "" -> - Element - ( "member" - , [] - , [Element ("name", [], [PCData name]); Element ("value", [], [])] - ) - | SExpr.String value -> - Element - ( "member" - , [] - , [ - Element ("name", [], [PCData name]) - ; Element ("value", [], [PCData value]) - ] - ) - | SExpr.Node _ as somenode -> - Element - ( "member" - , [] - , [ - Element ("name", [], [PCData name]) - ; Element - ("value", [], [visit (h + 1) (SExpr.String "member") somenode]) - ] - ) - | _ -> - Element ("WRONG_SEXPR_MEMBER", [], []) - ) - (* member tag without values - wrong format - defaults to empty value *) - | _, SExpr.Node (SExpr.String "struct" :: _), SExpr.Node [SExpr.String name] - -> - Element - ( "member" - , [] - , [Element ("name", [], [PCData name]); Element ("value", [], [])] - ) - (* sexpr representing array tags *) - | h, _, SExpr.Node (SExpr.String "array" :: values) -> - let xmlvalues = List.map (visit (h + 1) sexpr) values in - Element - ( "array" - , [] - , [Element ("data", [], List.map (encase_with "value") xmlvalues)] - ) - (* sexpr representing any other tag with children *) - | h, _, SExpr.Node (SExpr.String tag :: atail) -> - let xmlvalues = List.map (visit (h + 1) sexpr) atail in - let xml_noemptytags = List.filter is_not_empty_tag xmlvalues in - Element (tag, [], xml_noemptytags) - (* sexpr representing a pcdata *) - | _, _, SExpr.String s -> - PCData s - (* sexpr representing a nameless tag *) - | _, _, SExpr.Node [] -> - Element ("EMPTY_SEXPR", [], []) - (* otherwise, we reached a senseless sexpr *) - | _ -> - Element ("WRONG_SEXPR", [], []) - in - encase_with "value" (visit 0 (SExpr.Node []) root) diff --git a/quality-gate.sh b/quality-gate.sh index 6785610ff30..50aa143ad42 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=268 + N=267 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=460 + N=459 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" From 36f2a476f70a4883a4dd94359b2599b75563f4c8 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 28 Nov 2025 15:04:36 +0000 Subject: [PATCH 11/25] ocaml/libs/http-lib: remove unused mime module Signed-off-by: Pau Ruiz Safont --- ocaml/libs/http-lib/mime.ml | 57 ------------------------------------ ocaml/libs/http-lib/mime.mli | 22 -------------- quality-gate.sh | 2 +- 3 files changed, 1 insertion(+), 80 deletions(-) delete mode 100644 ocaml/libs/http-lib/mime.ml delete mode 100644 ocaml/libs/http-lib/mime.mli diff --git a/ocaml/libs/http-lib/mime.ml b/ocaml/libs/http-lib/mime.ml deleted file mode 100644 index e8dabaca132..00000000000 --- a/ocaml/libs/http-lib/mime.ml +++ /dev/null @@ -1,57 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* MIME handling for HTTP responses *) - -open Printf - -(** Map extension to MIME type *) -type t = (string, string) Hashtbl.t - -let lowercase = Astring.String.Ascii.lowercase - -(** Parse an Apache-format mime.types file and return mime_t *) -let mime_of_file file = - let h = Hashtbl.create 1024 in - Xapi_stdext_unix.Unixext.readfile_line - (fun line -> - if not (Astring.String.is_prefix ~affix:"#" line) then - match Astring.String.fields ~empty:false line with - | [] | [_] -> - () - | mime :: exts -> - List.iter (fun e -> Hashtbl.add h (lowercase e) mime) exts - ) - file ; - h - -let string_of_mime m = - String.concat "," (Hashtbl.fold (fun k v a -> sprintf "{%s:%s}" k v :: a) m []) - -let default_mime = "text/plain" - -(** Map a file extension to a MIME type *) -let mime_of_ext mime ext = - Option.value (Hashtbl.find_opt mime (lowercase ext)) ~default:default_mime - -(** Figure out a mime type from a full filename *) -let mime_of_file_name mime fname = - (* split filename into dot components *) - let ext = - match Astring.String.cuts ~sep:"." fname with - | [] | [_] -> - "" - | x -> - List.hd (List.rev x) - in - mime_of_ext mime ext diff --git a/ocaml/libs/http-lib/mime.mli b/ocaml/libs/http-lib/mime.mli deleted file mode 100644 index 4566fe15b0f..00000000000 --- a/ocaml/libs/http-lib/mime.mli +++ /dev/null @@ -1,22 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -type t - -val mime_of_file : string -> t - -val string_of_mime : t -> string - -val mime_of_ext : t -> string -> string - -val mime_of_file_name : t -> string -> string diff --git a/quality-gate.sh b/quality-gate.sh index 50aa143ad42..018e498c85b 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=267 + N=266 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From 5b5631c95a6e8fd8fd507011cb2bb96a56d4545e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Fri, 28 Nov 2025 15:50:48 +0000 Subject: [PATCH 12/25] ocaml: reduce and guard all users of Pool.get_all |> List.hd During the bringup of a new platform for XCP-ng, one of the users has raised an exception because the database doesn't have any pool record. This change makes sure that the there's more information on the exception. Also changes other places where Pool.get_all to make it obvious that the result is matched and List.hd is less likely to be used. Signed-off-by: Pau Ruiz Safont --- ocaml/tests/test_event.ml | 8 ++++---- ocaml/tests/test_ha_vm_failover.ml | 2 +- ocaml/tests/test_host.ml | 4 ++-- ocaml/tests/test_host_helpers.ml | 2 +- ocaml/tests/test_platformdata.ml | 2 +- ocaml/tests/test_vdi_cbt.ml | 2 +- ocaml/xapi/db_gc.ml | 3 +-- ocaml/xapi/dbsync_master.ml | 3 +-- ocaml/xapi/helpers.ml | 7 ++++++- ocaml/xapi/message_forwarding.ml | 9 ++++++++- ocaml/xapi/xapi_pool_patch.ml | 2 +- quality-gate.sh | 2 +- 12 files changed, 28 insertions(+), 18 deletions(-) diff --git a/ocaml/tests/test_event.ml b/ocaml/tests/test_event.ml index 821bb3bb52d..6ae77c62402 100644 --- a/ocaml/tests/test_event.ml +++ b/ocaml/tests/test_event.ml @@ -117,7 +117,7 @@ let event_next_test () = let __context, _ = event_setup_common () in let () = Xapi_event.register ~__context ~classes:["pool"] in let wait_hdl = Delay.make () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in let key = "event_next_test" in ( try Db.Pool.remove_from_other_config ~__context ~self:pool ~key with _ -> () @@ -146,7 +146,7 @@ let event_next_test () = let wait_for_pool_key __context key = let token = ref "" in let finished = ref false in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in while not !finished do let events = Xapi_event.from ~__context ~classes:["pool"] ~token:!token ~timeout:10. @@ -160,7 +160,7 @@ let wait_for_pool_key __context key = let event_from_test () = let __context, _ = event_setup_common () in let wait_hdl = Delay.make () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in let key = "event_from_test" in ( try Db.Pool.remove_from_other_config ~__context ~self:pool ~key with _ -> () @@ -180,7 +180,7 @@ let event_from_test () = let event_from_parallel_test () = let __context, _ = event_setup_common () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in let key = "event_next_test" in ( try Db.Pool.remove_from_other_config ~__context ~self:pool ~key with _ -> () diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml index fe915563e18..7fa1c62ceb3 100644 --- a/ocaml/tests/test_ha_vm_failover.ml +++ b/ocaml/tests/test_ha_vm_failover.ml @@ -250,7 +250,7 @@ let setup ~__context {master; slaves; ha_host_failures_to_tolerate; cluster} = let host = List.nth (Db.Host.get_all ~__context) i in Test_common.make_cluster_host ~__context ~host () |> ignore done ; - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in Db.Pool.set_master ~__context ~self:pool ~value:master_ref ; Db.Pool.set_ha_enabled ~__context ~self:pool ~value:true ; Db.Pool.set_ha_host_failures_to_tolerate ~__context ~self:pool diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index bb869d292c0..beb5588e66d 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -33,7 +33,7 @@ let add_host __context name = let setup_test () = (* Create an unlicensed pool *) let __context = make_test_database () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in Db.Pool.set_restrictions ~__context ~self:pool ~value:(Features.to_assoc_list []) ; (* Add hosts until we're at the maximum unlicensed pool size *) @@ -58,7 +58,7 @@ let test_host_join_restriction () = ) (fun () -> ignore (add_host __context "badhost")) ; (* License the pool *) - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in Db.Pool.set_restrictions ~__context ~self:pool ~value:(Features.to_assoc_list [Features.Pool_size]) ; (* Adding hosts should now work *) diff --git a/ocaml/tests/test_host_helpers.ml b/ocaml/tests/test_host_helpers.ml index d8ea5a25d0c..1b782c5a4da 100644 --- a/ocaml/tests/test_host_helpers.ml +++ b/ocaml/tests/test_host_helpers.ml @@ -149,7 +149,7 @@ let test_rpu_suppression () = let __context, calls, host1, host2, watcher, token = setup_test_oc_watcher () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in Db.Pool.add_to_other_config ~__context ~self:pool ~key:Xapi_globs.rolling_upgrade_in_progress ~value:"true" ; Db.Host.set_multipathing ~__context ~self:host1 ~value:false ; diff --git a/ocaml/tests/test_platformdata.ml b/ocaml/tests/test_platformdata.ml index 36611a5cd5a..f5d591a750e 100644 --- a/ocaml/tests/test_platformdata.ml +++ b/ocaml/tests/test_platformdata.ml @@ -97,7 +97,7 @@ module Licensing = struct let test_nested_virt_licensing (platform, should_raise) () = let __context = Test_common.make_test_database () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in let test_checks = if should_raise then Alcotest.check_raises diff --git a/ocaml/tests/test_vdi_cbt.ml b/ocaml/tests/test_vdi_cbt.ml index 54ae411ac97..4f86dc737b8 100644 --- a/ocaml/tests/test_vdi_cbt.ml +++ b/ocaml/tests/test_vdi_cbt.ml @@ -142,7 +142,7 @@ let test_cbt_enable_disable () = let test_set_metadata_of_pool_doesnt_allow_cbt_metadata_vdi () = let __context = Test_common.make_test_database () in let self = Test_common.make_vdi ~__context ~_type:`cbt_metadata () in - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in Alcotest.check_raises "VDI.set_metadata_of_pool should throw VDI_INCOMPATIBLE_TYPE for a \ cbt_metadata VDI" diff --git a/ocaml/xapi/db_gc.ml b/ocaml/xapi/db_gc.ml index b1bcf8fc953..12e0284125d 100644 --- a/ocaml/xapi/db_gc.ml +++ b/ocaml/xapi/db_gc.ml @@ -152,8 +152,7 @@ let detect_rolling_upgrade ~__context = Helpers.Checks.RPU.pool_has_different_host_platform_versions ~__context in (* Check the current state of the Pool as indicated by the Pool.other_config:rolling_upgrade_in_progress *) - let pools = Db.Pool.get_all ~__context in - match pools with + match Db.Pool.get_all ~__context with | [] -> debug "Ignoring absence of pool record in detect_rolling_upgrade: this is \ diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index 26657d758eb..dbf6080f458 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -25,8 +25,7 @@ open Recommendations (* create pool record (if master and not one already there) *) let create_pool_record ~__context = - let pools = Db.Pool.get_all ~__context in - if pools = [] then + if Db.Pool.get_all ~__context = [] then Db.Pool.create ~__context ~ref:(Ref.make ()) ~uuid:(Uuidx.to_string (Uuidx.make ())) ~name_label:"" ~name_description:"" diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 492b054cd28..32fb3c97d89 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -142,7 +142,12 @@ let checknull f = try f () with _ -> "" let ignore_invalid_ref f (x : 'a Ref.t) = try Ref.to_option (f x) with Db_exn.DBCache_NotFound _ -> None -let get_pool ~__context = List.hd (Db.Pool.get_all ~__context) +let get_pool ~__context = + match Db.Pool.get_all ~__context with + | [] -> + raise (Failure "Helpers.get_pool: No pool available") + | pool :: _ -> + pool let get_master ~__context = Db.Pool.get_master ~__context ~self:(get_pool ~__context) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index af2b1aa1458..060195e120a 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -363,8 +363,15 @@ functor with _ -> "invalid" let current_pool_uuid ~__context = + let get_pool_record () = + match Db.Pool.get_all_records ~__context with + | [] -> + raise (Failure "current_pool_uuid: no pool available") + | (_, pool) :: _ -> + pool + in if Pool_role.is_master () then - let _, pool = List.hd (Db.Pool.get_all_records ~__context) in + let pool = get_pool_record () in Printf.sprintf "%s%s" pool.API.pool_uuid (add_brackets pool.API.pool_name_label) else diff --git a/ocaml/xapi/xapi_pool_patch.ml b/ocaml/xapi/xapi_pool_patch.ml index 4c30792b7f0..a1d006e688a 100644 --- a/ocaml/xapi/xapi_pool_patch.ml +++ b/ocaml/xapi/xapi_pool_patch.ml @@ -61,7 +61,7 @@ let pool_patch_upload_handler (req : Http.Request.t) s _ = | Some _ -> query (* There was already an SR specified *) | None -> - let pool = Db.Pool.get_all ~__context |> List.hd in + let pool = Helpers.get_pool ~__context in let default_SR = Db.Pool.get_default_SR ~__context ~self:pool in ("sr_id", Ref.string_of default_SR) :: query in diff --git a/quality-gate.sh b/quality-gate.sh index 018e498c85b..c7965c34f0e 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=266 + N=253 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From 69679832d93b8550cdc9de69ec396551d97e0879 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 28 Nov 2025 12:50:37 +0800 Subject: [PATCH 13/25] CA-419908: Update xenstore watcher to refresh domains when VM is renamed The xenstore watcher maintains a map from domid to VM UUID. This map is used to dispatch the xenstore events. When the VM is renamed, its UUID changes. Hence this map needs to refresh. Otherwise, the xenstore events could not be dispatched to renamed VM. Signed-off-by: Ming Lu --- ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml | 14 +++++++++++--- ocaml/xenopsd/xc/xenops_server_xen.ml | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml b/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml index e552ecb1e5a..b90f3e621ce 100644 --- a/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml +++ b/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml @@ -115,6 +115,10 @@ module Make (Debug : DEBUG) = struct in List.map fst (IntMap.bindings c) + let need_refresh_domains = Atomic.make false + + let mark_refresh_domains () = Atomic.set need_refresh_domains true + let with_xc_and_xs f = Xenctrl.with_intf (fun xc -> with_xs (fun xs -> f xc xs)) @@ -196,9 +200,13 @@ module Make (Debug : DEBUG) = struct in let process_one_watch c (path, _token) = - if path = _introduceDomain || path = _releaseDomain then - look_for_different_domains () - else + if + Atomic.exchange need_refresh_domains false + || path = _introduceDomain + || path = _releaseDomain + then + look_for_different_domains () ; + if path <> _introduceDomain && path <> _releaseDomain then Client.immediate c (fun h -> let xs = Xs.ops h in Actions.watch_fired xc xs path !domains !watches diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 4f73cff49b3..101568106af 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1995,7 +1995,8 @@ module VM = struct ) ; debug "Moving xenstore tree" ; Domain.move_xstree ~xs di.Xenctrl.domid old_name new_name ; - DB.rename old_name new_name + DB.rename old_name new_name ; + Watcher.mark_refresh_domains () in Option.iter rename_domain (di_of_uuid ~xc (uuid_of_string old_name)) From 1b7f1490335ca7856c0ed3a68cfde4b1ed70eb79 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Mon, 1 Dec 2025 10:58:18 +0800 Subject: [PATCH 14/25] Refactor IntMap to use built-in Int module in Map.Make Signed-off-by: Ming Lu --- ocaml/xenopsd/xc/xenops_server_xen.ml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 101568106af..3e7bd2a3584 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -25,12 +25,7 @@ module D = Debug.Make (struct let name = service_name end) open D module RRDD = Rrd_client.Client module StringSet = Set.Make (String) - -module IntMap = Map.Make (struct - type t = int - - let compare = compare -end) +module IntMap = Map.Make (Int) let finally = Xapi_stdext_pervasives.Pervasiveext.finally From c1b1311d246c06f922cb6f72abfe011820342505 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Wed, 26 Nov 2025 07:36:07 +0000 Subject: [PATCH 15/25] CP-309847: Make HTTP/80 configurable - Introduce https_only argument for Host.create - Set https_only from configuration for installation - Keep https_only from joining host during pool join Signed-off-by: Lin Liu --- ocaml/idl/datamodel_host.ml | 9 +++++++++ ocaml/tests/common/test_common.ml | 10 +++++----- ocaml/tests/test_host.ml | 1 + ocaml/xapi/dbsync_slave.ml | 1 + ocaml/xapi/xapi_globs.ml | 7 +++++++ ocaml/xapi/xapi_host.ml | 4 ++-- ocaml/xapi/xapi_host.mli | 1 + ocaml/xapi/xapi_pool.ml | 2 ++ 8 files changed, 28 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 29b5610b226..f35c6e95103 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1398,6 +1398,14 @@ let create_params = ; param_release= numbered_release "25.32.0-next" ; param_default= Some (VMap []) } + ; { + param_type= Bool + ; param_name= "https_only" + ; param_doc= + "updates firewall to open or close port 80 depending on the value" + ; param_release= numbered_release "25.38.0-next" + ; param_default= Some (VBool false) + } ] let create = @@ -1416,6 +1424,7 @@ let create = --console_idle_timeout --ssh_auto_mode options to allow them to be \ configured for new host" ) + ; (Changed, "25.38.0-next", "Added --https_only to disable http") ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 09f6a3b465a..7fc190f43c7 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -175,7 +175,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(last_software_update = Date.epoch) ?(last_update_hash = "") ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) ?(console_idle_timeout = 0L) ?(ssh_auto_mode = false) ?(secure_boot = false) - () = + ?(https_only = false) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name @@ -184,6 +184,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ~software_version:(Xapi_globs.software_version ()) + ~https_only in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host @@ -194,15 +195,14 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(external_auth_type = "") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) - ?(chipset_info = []) ?(ssl_legacy = false) () = + ?(chipset_info = []) ?(ssl_legacy = false) ?(https_only = false) () = let pool = Helpers.get_pool ~__context in let tls_verification_enabled = Db.Pool.get_tls_verification_enabled ~__context ~self:pool in Db.Host.create ~__context ~ref ~current_operations:[] ~allowed_operations:[] ~software_version:(Xapi_globs.software_version ()) - ~https_only:false ~enabled:false - ~aPI_version_major:Datamodel_common.api_version_major + ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor ~aPI_version_vendor_implementation: @@ -224,7 +224,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false - ~secure_boot:false ; + ~secure_boot:false ~https_only ; ref let make_pif ~__context ~network ~host ?(device = "eth0") diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index bb869d292c0..45ca0d3c2ea 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -27,6 +27,7 @@ let add_host __context name = ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false ~software_version:(Xapi_globs.software_version ()) + ~https_only:false ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 91bea2d25b4..ff325b7259e 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -66,6 +66,7 @@ let create_localhost ~__context info = ~console_idle_timeout:Constants.default_console_idle_timeout ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default ~secure_boot:false ~software_version:[] + ~https_only:!Xapi_globs.https_only in () diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 5d4fe609b52..161273c83f9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1133,6 +1133,8 @@ let xapi_requests_cgroup = let genisoimage_path = ref "/usr/bin/genisoimage" +let https_only = ref false + (* Event.{from,next} batching delays *) let make_batching name ~delay_before ~delay_between = let name = Printf.sprintf "%s_delay" name in @@ -1834,6 +1836,11 @@ let other_options = , (fun () -> string_of_int !max_span_depth) , "The maximum depth to which spans are recorded in a trace in Tracing" ) + ; ( "https-only-default" + , Arg.Set https_only + , (fun () -> string_of_bool !https_only) + , "Only expose HTTPS service, disable HTTP/80 in firewall when set to true" + ) ; ( "firewall-backend" , Arg.String (fun s -> diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index ee446592bb9..b9f105610dd 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1029,7 +1029,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode - ~secure_boot ~software_version = + ~secure_boot ~software_version ~https_only = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1064,7 +1064,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address (* no or multiple pools *) in Db.Host.create ~__context ~ref:host ~current_operations:[] - ~allowed_operations:[] ~https_only:false ~software_version ~enabled:false + ~allowed_operations:[] ~https_only ~software_version ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 316ee9f6edf..b20f4ef3fe9 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -138,6 +138,7 @@ val create : -> ssh_auto_mode:bool -> secure_boot:bool -> software_version:(string * string) list + -> https_only:bool -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index cbb39e28adb..752d822135f 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1033,6 +1033,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : create_or_get_sr_on_master __context rpc session_id (my_local_cache_sr, my_local_cache_sr_rec) in + debug "Creating host object on master" ; let ref = Client.Host.create ~rpc ~session_id ~uuid:my_uuid @@ -1060,6 +1061,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~ssh_auto_mode:host.API.host_ssh_auto_mode ~secure_boot:host.API.host_secure_boot ~software_version:host.API.host_software_version + ~https_only:host.API.host_https_only in (* Copy other-config into newly created host record: *) no_exn From 4da3c01208a9729ba28cd2a5326ee97e8d530f12 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 24 Feb 2023 14:49:39 +0000 Subject: [PATCH 16/25] CP-31566 define xenopsd fast resume operation Define and implement an operation that uses Xen's fast resume to reume a domain. This operation is currently not used but has been tested. It is accessible from the xenopsd CLI ("xenops-cli") for experiments. Signed-off-by: Christian Lindig --- ocaml/xapi-idl/xen/xenops_interface.ml | 4 ++++ ocaml/xenopsd/cli/main.ml | 20 ++++++++++++++++++++ ocaml/xenopsd/cli/xn.ml | 9 +++++++++ ocaml/xenopsd/cli/xn.mli | 3 +++ ocaml/xenopsd/lib/xenops_server.ml | 10 ++++++++++ ocaml/xenopsd/lib/xenops_server_plugin.ml | 2 ++ ocaml/xenopsd/lib/xenops_server_skeleton.ml | 2 ++ ocaml/xenopsd/xc/domain.ml | 13 +++++++++++++ ocaml/xenopsd/xc/domain.mli | 10 ++++++++++ ocaml/xenopsd/xc/xenops_server_xen.ml | 20 ++++++++++++++++++++ 10 files changed, 93 insertions(+) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 3920736afd5..f27b4ec00b8 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -855,6 +855,10 @@ module XenopsAPI (R : RPC) = struct declare "VM.resume" [] (debug_info_p @-> vm_id_p @-> disk_p @-> returning task_id_p err) + let fast_resume = + declare "VM.fast_resume" [] + (debug_info_p @-> vm_id_p @-> returning task_id_p err) + let s3suspend = declare "VM.s3suspend" [] (debug_info_p @-> vm_id_p @-> returning task_id_p err) diff --git a/ocaml/xenopsd/cli/main.ml b/ocaml/xenopsd/cli/main.ml index a8111444880..54842be4b31 100644 --- a/ocaml/xenopsd/cli/main.ml +++ b/ocaml/xenopsd/cli/main.ml @@ -317,6 +317,25 @@ let resume_cmd = , Cmd.info "resume" ~sdocs:_common_options ~doc ~man ) +let fast_resume_cmd = + let vm = vm_arg "resumed" in + let doc = "fast-resume a VM" in + let man = + [ + `S "DESCRIPTION" + ; `P "Fast-resume a VM." + ; `P + {|The suspended domain will be resumed + and the VM will be left in a Running state.|} + ; `S "ERRORS" + ; `P "Something about the current power state." + ] + @ help + in + ( Term.(ret (const Xn.fast_resume $ common_options_t $ vm)) + , Cmd.info "fast-resume" ~sdocs:_common_options ~doc ~man + ) + let pause_cmd = let vm = vm_arg "paused" in let doc = "pause a VM" in @@ -491,6 +510,7 @@ let cmds = ; reboot_cmd ; suspend_cmd ; resume_cmd + ; fast_resume_cmd ; pause_cmd ; unpause_cmd ; import_cmd diff --git a/ocaml/xenopsd/cli/xn.ml b/ocaml/xenopsd/cli/xn.ml index 24fecb9cf09..03c8db2e31c 100644 --- a/ocaml/xenopsd/cli/xn.ml +++ b/ocaml/xenopsd/cli/xn.ml @@ -873,6 +873,15 @@ let suspend _copts disk x = let suspend copts disk x = diagnose_error (need_vm (suspend copts disk) x) +let fast_resume _copts x = + let open Vm in + let vm, _ = find_by_name x in + Client.VM.fast_resume dbg vm.id + |> wait_for_task dbg + |> success_task ignore_task + +let fast_resume copts x = diagnose_error (need_vm (fast_resume copts) x) + let resume _copts disk x = (* We don't currently store where the suspend image is *) let disk = diff --git a/ocaml/xenopsd/cli/xn.mli b/ocaml/xenopsd/cli/xn.mli index 0acd3551e09..615f5c868b2 100644 --- a/ocaml/xenopsd/cli/xn.mli +++ b/ocaml/xenopsd/cli/xn.mli @@ -47,6 +47,9 @@ val resume : -> string option -> [> `Error of bool * string | `Ok of unit] +val fast_resume : + 'a -> string option -> [> `Error of bool * string | `Ok of unit] + val console_connect : 'a -> string option -> [> `Error of bool * string | `Ok of unit] diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 9703f4c2a93..54d528829ff 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -164,6 +164,7 @@ type atomic = (** takes suspend data, plus optionally vGPU state data *) | VM_restore of (Vm.id * data * data option) (** takes suspend data, plus optionally vGPU state data *) + | VM_fast_resume of Vm.id | VM_delay of (Vm.id * float) (** used to suppress fast reboot loops *) | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) @@ -279,6 +280,8 @@ let rec name_of_atomic = function "VM_save" | VM_restore _ -> "VM_restore" + | VM_fast_resume _ -> + "VM_fast_resume" | VM_delay _ -> "VM_delay" | VM_rename _ -> @@ -2377,6 +2380,9 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) let extras = [] in B.VM.restore t progress_callback (VM_DB.read_exn id) vbds vifs data vgpu_data extras + | VM_fast_resume id -> + debug "VM.fast_resume %s" id ; + B.VM.resume t (VM_DB.read_exn id) | VM_delay (id, t) -> debug "VM %s: waiting for %.2f before next VM action" id t ; Thread.delay t @@ -2669,6 +2675,7 @@ and trigger_cleanup_after_failure_atom op t = | VM_s3resume id | VM_save (id, _, _, _) | VM_restore (id, _, _) + | VM_fast_resume id | VM_delay (id, _) | VM_softreboot id -> immediate_operation dbg id (VM_check_state id) @@ -3828,6 +3835,8 @@ module VM = struct let resume _ dbg id disk = queue_operation dbg id (VM_resume (id, Disk disk)) + let fast_resume _ dbg id = queue_operation dbg id (Atomic (VM_fast_resume id)) + let s3suspend _ dbg id = queue_operation dbg id (Atomic (VM_s3suspend id)) let s3resume _ dbg id = queue_operation dbg id (Atomic (VM_s3resume id)) @@ -4409,6 +4418,7 @@ let _ = Server.VM.reboot (VM.reboot ()) ; Server.VM.suspend (VM.suspend ()) ; Server.VM.resume (VM.resume ()) ; + Server.VM.fast_resume (VM.fast_resume ()) ; Server.VM.s3suspend (VM.s3suspend ()) ; Server.VM.s3resume (VM.s3resume ()) ; Server.VM.export_metadata (VM.export_metadata ()) ; diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index e4a61bb9ac8..6cee8a58f05 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -159,6 +159,8 @@ module type S = sig -> string list -> unit + val resume : Xenops_task.task_handle -> Vm.t -> unit + val s3suspend : Xenops_task.task_handle -> Vm.t -> unit val s3resume : Xenops_task.task_handle -> Vm.t -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index 1a42aafafb4..d812910fd27 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -97,6 +97,8 @@ module VM = struct let restore _ _ _ _ _ _ _ = unimplemented __FUNCTION__ + let resume _ _ = unimplemented __FUNCTION__ + let s3suspend _ _ = unimplemented __FUNCTION__ let s3resume _ _ = unimplemented __FUNCTION__ diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 90df781d8f2..cf124131bd2 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1364,6 +1364,19 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn store_port local_stuff vm_stuff +let resume_post ~xc ~xs domid = + let dom_path = xs.Xs.getdomainpath domid in + let store_mfn_s = xs.Xs.read (dom_path ^ "/store/ring-ref") in + let store_mfn = Nativeint.of_string store_mfn_s in + let store_port = int_of_string (xs.Xs.read (dom_path ^ "/store/port")) in + xs.Xs.introduce domid store_mfn store_port + +let resume (task : Xenops_task.task_handle) ~xc ~xs ~qemu_domid ~domain_type + domid = + Xenctrl.domain_resume_fast xc domid ; + resume_post ~xc ~xs domid ; + if domain_type = `hvm then Device.Dm.resume task ~xs ~qemu_domid domid + type suspend_flag = Live | Debug let dm_flags = diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 40f154561a3..574782fdcec 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -242,6 +242,16 @@ val build : -> unit (** Restore a domain using the info provided *) +val resume : + Xenops_task.Xenops_task.task_handle + -> xc:Xenctrl.handle + -> xs:Ezxenstore_core.Xenstore.Xs.xsh + -> qemu_domid:int + -> domain_type:[`hvm | `pv | `pvh] + -> domid + -> unit +(** Fast resume *) + val restore : Xenops_task.Xenops_task.task_handle -> xc:Xenctrl.handle diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 16031f211de..03a4c0b641f 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3021,6 +3021,26 @@ module VM = struct Domain.shutdown ~xc ~xs di.Xenctrl.domid Domain.S3Suspend ) + let resume t vm = + on_domain t vm (fun xc xs task vm di -> + let domid = di.Xenctrl.domid in + let qemu_domid = this_domid ~xs in + let domain_type = + match get_domain_type ~xs di with + | Vm.Domain_HVM -> + `hvm + | Vm.Domain_PV -> + `pv + | Vm.Domain_PVinPVH -> + `pvh + | Vm.Domain_PVH -> + `pvh + | Vm.Domain_undefined -> + failwith "undefined domain type: cannot resume" + in + Domain.resume task ~xc ~xs ~qemu_domid ~domain_type domid + ) + let s3resume t vm = (* XXX: TODO: monitor the guest's response; track the s3 state *) on_domain t vm (fun xc _xs _task _vm di -> From 88ece8963889d1b07b0a992e0138678c18657000 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 2 Dec 2025 16:00:47 +0000 Subject: [PATCH 17/25] fixup! CP-31566 define xenopsd fast resume operation Signed-off-by: Christian Lindig --- ocaml/xenopsd/xc/domain.ml | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index cf124131bd2..57008388db7 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1364,7 +1364,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn store_port local_stuff vm_stuff -let resume_post ~xc ~xs domid = +let resume_post ~xc:_ ~xs domid = let dom_path = xs.Xs.getdomainpath domid in let store_mfn_s = xs.Xs.read (dom_path ^ "/store/ring-ref") in let store_mfn = Nativeint.of_string store_mfn_s in diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 03a4c0b641f..8b4d0a4b40a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3022,7 +3022,7 @@ module VM = struct ) let resume t vm = - on_domain t vm (fun xc xs task vm di -> + on_domain t vm (fun xc xs task _vm di -> let domid = di.Xenctrl.domid in let qemu_domid = this_domid ~xs in let domain_type = From f255a466d0147a0f8ad68d90876f6b97f77b837f Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 3 Dec 2025 15:55:41 +0000 Subject: [PATCH 18/25] increase max supported NVMe request size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current default value for the NVMe MDTS parameter exposed in QEMU emulated NMVe devices is 7 (max 512KiB requests). However there seems to be an internal Windows Server 2025 issue that possibly triggers when splitting bigger requests into smaller on in the NVMe Windows driver. Increase the exposed MDTS value on the emulated QEMU NVMe device to 9 (max 2MiB request size), as that seems to drop the reproduction rate of the issue. Discussion is ongoing with Microsoft to get the issue identified and possibly sorted on their end. For the time being apply this mitigation in qemu-wrapper as a workaround. Signed-off-by: Roger Pau Monné Signed-off-by: Christian Lindig --- ocaml/xenopsd/xc/device.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 73f136feca4..9beeecf436b 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -2774,7 +2774,7 @@ module Backend = struct ] (* 4 and 5 are NICs, and we can only have two, 6 is platform *) - let extra_args = ["-device"; "nvme,serial=nvme0,id=nvme0,addr=7"] + let extra_args = ["-device"; "nvme,serial=nvme0,mdts=9,id=nvme0,addr=7"] end module XenPV = struct let addr ~xs:_ ~domid:_ _ ~nics:_ = 6 end From f905d643745945f4eb262973d40eabaeaeeba35d Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 3 Dec 2025 18:47:23 +0800 Subject: [PATCH 19/25] CA-420533: Only clear RestartVM guidance on up-to-date hosts During rolling pool upgrade (RPU), RestartVM guidance should only be cleared when a VM restarts on a host that has been updated to match the coordinator's software version. Previously, the guidance was cleared whenever a VM restarted, regardless of the host's update status. This commit ensures that RestartVM guidance persists until the VM restarts on an up-to-date host, this provides accurate feedback to administrators about which VMs still need restarting after RPU. Also adds unit tests covering 6 scenarios: * VM start on updated vs old host (via xenopsd) * Suspended VM resume on updated vs old host * VM halt on updated vs old host (via force_state_reset) Signed-off-by: Gang Ji --- ocaml/tests/test_xapi_xenops.ml | 216 +++++++++++++++++++++++++++++++- ocaml/xapi/xapi_vm_lifecycle.ml | 3 + ocaml/xapi/xapi_xenops.ml | 9 +- 3 files changed, 225 insertions(+), 3 deletions(-) diff --git a/ocaml/tests/test_xapi_xenops.ml b/ocaml/tests/test_xapi_xenops.ml index e1f1bf048e2..42f0bb5708d 100644 --- a/ocaml/tests/test_xapi_xenops.ml +++ b/ocaml/tests/test_xapi_xenops.ml @@ -3,6 +3,92 @@ open Test_common module D = Debug.Make (struct let name = "test_xapi_xenops" end) open D +module Date = Clock.Date + +(** Helper to create a Xenops VM state for testing *) +let make_xenops_state ~power_state ?(last_start_time = 0.0) () = + let open Xenops_interface.Vm in + { + power_state + ; domids= [0] + ; consoles= [] + ; memory_target= 0L + ; memory_actual= 0L + ; memory_limit= 0L + ; vcpu_target= 1 + ; shadow_multiplier_target= 1.0 + ; rtc_timeoffset= "" + ; uncooperative_balloon_driver= false + ; guest_agent= [] + ; xsdata_state= [] + ; pv_drivers_detected= false + ; last_start_time + ; hvm= false + ; nomigrate= false + ; nested_virt= false + ; domain_type= Domain_PV + ; featureset= "" + } + +(** Helper to set up VM for testing: sets pending guidances, resident host, and power state *) +let setup_vm_for_test ~__context ~vm ~guidances ~resident_on ~power_state = + Db.VM.set_pending_guidances ~__context ~self:vm ~value:guidances ; + Db.VM.set_resident_on ~__context ~self:vm ~value:resident_on ; + Db.VM.set_power_state ~__context ~self:vm ~value:power_state + +(** Helper to check pending guidances after an operation *) +let check_pending_guidances ~__context ~vm ~expect_restart_vm + ~expect_restart_device_model ~test_description = + let remaining = Db.VM.get_pending_guidances ~__context ~self:vm in + Alcotest.(check bool) + (Printf.sprintf "restart_vm guidance %s - %s" + (if expect_restart_vm then "present" else "cleared") + test_description + ) + expect_restart_vm + (List.mem `restart_vm remaining) ; + Alcotest.(check bool) + (Printf.sprintf "restart_device_model guidance %s - %s" + (if expect_restart_device_model then "present" else "cleared") + test_description + ) + expect_restart_device_model + (List.mem `restart_device_model remaining) + +(** Helper to simulate a VM state update via update_vm_internal *) +let simulate_vm_state_update ~__context ~vm ~previous_power_state + ~new_power_state ~localhost = + let previous_state = make_xenops_state ~power_state:previous_power_state () in + let new_state = + make_xenops_state ~power_state:new_power_state ~last_start_time:100.0 () + in + let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let metrics = Db.VM.get_metrics ~__context ~self:vm in + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:(Date.of_unix_time 50.0) ; + ignore + (Xapi_xenops.update_vm_internal ~__context ~id:vm_uuid ~self:vm + ~previous:(Some previous_state) ~info:(Some new_state) ~localhost + ) + +(** Helper to set host software version *) +let set_host_software_version ~__context ~host ~platform_version ~xapi_version = + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ~value:platform_version ; + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ~value:xapi_version + +(** Helper to get the pool from the test database *) +let get_pool ~__context = + match Db.Pool.get_all ~__context with + | pool :: _ -> + pool + | [] -> + failwith "No pool found in test database" let simulator_setup = ref false @@ -187,4 +273,132 @@ let test_xapi_restart () = ) unsetup_simulator -let test = [("test_xapi_restart", `Quick, test_xapi_restart)] +(** Test that RestartVM guidance is only cleared when VM starts on up-to-date host *) +let test_pending_guidance_vm_start () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + + (* Set up VM guidances - both restart_vm and restart_device_model *) + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM starting on up-to-date host - should clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM started on up-to-date host" ; + + (* Test 2: VM starting on old host - should NOT clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:false + ~test_description:"VM started on old host" + +(** Test that NO guidance is cleared when suspended VM resumes *) +let test_pending_guidance_vm_resume () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + (* Test 1: Suspended VM resumed on up-to-date host - should NOT clear any guidance *) + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on up-to-date host" ; + + (* Test 2: Suspended VM resumed on old host - should NOT clear any guidance *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on old host" + +(** Test that RestartVM guidance is always cleared when VM is halted *) +let test_pending_guidance_vm_halt () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM halted on up-to-date host - should clear both guidances *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM halted on up-to-date host" ; + + (* Test 2: VM halted on old host - should ALSO clear both guidances + because VM.start_on will enforce host version check on next start *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false ~test_description:"VM halted on old host" + +let test = + [ + ("test_xapi_restart", `Quick, test_xapi_restart) + ; ("test_pending_guidance_vm_start", `Quick, test_pending_guidance_vm_start) + ; ("test_pending_guidance_vm_resume", `Quick, test_pending_guidance_vm_resume) + ; ("test_pending_guidance_vm_halt", `Quick, test_pending_guidance_vm_halt) + ] diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 14290421fb4..6d1ce9a537f 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -909,6 +909,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = (* Blank the requires_reboot flag *) Db.VM.set_requires_reboot ~__context ~self ~value:false ; remove_pending_guidance ~__context ~self ~value:`restart_device_model ; + (* Always remove RestartVM guidance when VM becomes Halted: VM.start_on checks + host version via assert_host_has_highest_version_in_pool, preventing the VM + from starting on an outdated host, so it will necessarily start on an up-to-date host *) remove_pending_guidance ~__context ~self ~value:`restart_vm ) ; (* Do not clear resident_on for VM and VGPU in a checkpoint operation *) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 9b12bcec5a6..0ea29ea4cf7 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2350,8 +2350,13 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = then ( Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self - ~value:`restart_vm + (* Only remove RestartVM guidance if host is up-to-date with coordinator *) + if + Helpers.Checks.RPU.are_host_versions_same_on_master ~__context + ~host:localhost + then + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm ) ) ; create_guest_metrics_if_needed () ; From bb705d1334f4e8df49dba1a7a3c732b236746f97 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:35:44 +0000 Subject: [PATCH 20/25] qcow-stream-tool: Add read_headers command It returns info on the allocated clusters in a JSON. Signed-off-by: Andrii Sultanov --- ocaml/qcow-stream-tool/dune | 4 ++ ocaml/qcow-stream-tool/qcow_stream_tool.ml | 69 +++++++++++++++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/ocaml/qcow-stream-tool/dune b/ocaml/qcow-stream-tool/dune index 4daf3469dc5..436dd58681c 100644 --- a/ocaml/qcow-stream-tool/dune +++ b/ocaml/qcow-stream-tool/dune @@ -7,5 +7,9 @@ qcow-stream cmdliner unix + lwt.unix + lwt + qcow-types + yojson ) ) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml index 7158867c248..41b57c9a366 100644 --- a/ocaml/qcow-stream-tool/qcow_stream_tool.ml +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -1,11 +1,53 @@ +open Cmdliner + module Impl = struct let stream_decode output = Qcow_stream.stream_decode Unix.stdin output ; `Ok () + + let read_headers qcow_path = + let open Lwt.Syntax in + let t = + let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in + let* virtual_size, cluster_bits, _, data_cluster_map = + Qcow_stream.start_stream_decode fd + in + let clusters = Qcow_types.Cluster.Map.bindings data_cluster_map in + let clusters = + List.map + (fun (_, virt_address) -> + let ( >> ) = Int64.shift_right_logical in + let address = + Int64.to_int (virt_address >> Int32.to_int cluster_bits) + in + `Int address + ) + clusters + in + let json = + `Assoc + [ + ("virtual_size", `Int (Int64.to_int virtual_size)) + ; ("cluster_bits", `Int (Int32.to_int cluster_bits)) + ; ("data_clusters", `List clusters) + ] + in + let json_string = Yojson.to_string json in + let* () = Lwt_io.print json_string in + let* () = Lwt_io.flush Lwt_io.stdout in + Lwt.return_unit + in + Lwt_main.run t ; `Ok () end module Cli = struct - open Cmdliner + let output default = + let doc = Printf.sprintf "Path to the output file." in + Arg.(value & pos 0 string default & info [] ~doc) + + let input = + let doc = Printf.sprintf "Path to the input file." in + Arg.(required & pos 0 (some string) None & info [] ~doc) let stream_decode_cmd = let doc = "decode qcow2 formatted data from stdin and write a raw image" in @@ -15,15 +57,28 @@ module Cli = struct ; `P "Decode qcow2 formatted data from stdin and write to a raw file." ] in - let output default = - let doc = Printf.sprintf "Path to the output file." in - Arg.(value & pos 0 string default & info [] ~doc) - in Cmd.v (Cmd.info "stream_decode" ~doc ~man) Term.(ret (const Impl.stream_decode $ output "test.raw")) - let main () = Cmd.eval stream_decode_cmd + let read_headers_cmd = + let doc = + "Determine allocated clusters by parsing qcow2 file at the provided \ + path. Returns JSON like the following: {'virtual_size': X, \ + 'cluster_bits': Y, 'data_clusters': [1,2,3]}" + in + let man = [`S "DESCRIPTION"; `P doc] in + Cmd.v + (Cmd.info "read_headers" ~doc ~man) + Term.(ret (const Impl.read_headers $ input)) + + let cmds = [stream_decode_cmd; read_headers_cmd] end -let () = exit (Cli.main ()) +let info = + let doc = "minimal CLI for qcow-stream" in + Cmd.info "qcow-stream-tool" ~version:"1.0.0" ~doc + +let () = + let cmd = Cmd.group info Cli.cmds in + exit (Cmd.eval cmd) From 5ec13cc5c23dcab31309dcfdd8f751a307db6d7f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:40:13 +0000 Subject: [PATCH 21/25] python3: Use pre-parsed cluster allocation data in qcow2-to-stdout On export, instead of reading the whole raw disk, consult the JSON (if provided), and only allocate the clusters that are present in the table. This is analogous to vhd-tool's handling of export, and greatly speeds up handling of sparse disks. Signed-off-by: Andrii Sultanov --- python3/libexec/qcow2-to-stdout.py | 135 ++++++++++++++++++++++++----- 1 file changed, 112 insertions(+), 23 deletions(-) diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index b0638bc5904..4ce1cc72b56 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -24,6 +24,7 @@ # clusters. For the sake of simplicity the code sometimes talks about # refcount tables and L1 tables when referring to those clusters. +import json import argparse import math import os @@ -91,7 +92,9 @@ def write_features(cluster, offset, data_file_name): def write_qcow2_content(input_file, cluster_size, refcount_bits, - data_file_name, data_file_raw, diff_file_name): + data_file_name, data_file_raw, diff_file_name, + virtual_size, nonzero_clusters, + diff_virtual_size, diff_nonzero_clusters): # Some basic values l1_entries_per_table = cluster_size // 8 l2_entries_per_table = cluster_size // 8 @@ -102,8 +105,12 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, fd = os.open(input_file, os.O_RDONLY) # Virtual disk size, number of data clusters and L1 entries - block_device_size = os.lseek(fd, 0, os.SEEK_END) - disk_size = align_up(block_device_size, 512) + if virtual_size is None: + block_device_size = os.lseek(fd, 0, os.SEEK_END) + disk_size = align_up(block_device_size, 512) + else: + block_device_size = virtual_size + disk_size = virtual_size total_data_clusters = math.ceil(disk_size / cluster_size) l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) @@ -118,6 +125,28 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, allocated_l2_tables = 0 allocated_data_clusters = 0 + def allocate_cluster(idx): + nonlocal allocated_data_clusters + nonlocal allocated_l2_tables + bitmap_set(l2_bitmap, idx) + allocated_data_clusters += 1 + # Allocated data clusters also need their corresponding L1 entry and L2 table + l1_idx = math.floor(idx / l2_entries_per_table) + if not bitmap_is_set(l1_bitmap, l1_idx): + bitmap_set(l1_bitmap, l1_idx) + allocated_l2_tables += 1 + + # Allocates a cluster in the appropriate bitmaps if it's different + # from cluster_to_compare_with + def check_cluster_allocate(idx, cluster, cluster_to_compare_with): + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + # If a cluster has different data from the cluster_to_compare_with then it + # must be allocated in the output file and its L2 entry must be set + if cluster != cluster_to_compare_with: + allocate_cluster(idx) + if data_file_raw: # If data_file_raw is set then all clusters are allocated and # we don't need to read the input file at all. @@ -126,26 +155,39 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, bitmap_set(l1_bitmap, idx) for idx in range(total_data_clusters): bitmap_set(l2_bitmap, idx) - else: - # Allocates a cluster in the appropriate bitmaps if it's different - # from cluster_to_compare_with - def check_cluster_allocate(idx, cluster, cluster_to_compare_with): - nonlocal allocated_data_clusters - nonlocal allocated_l2_tables - # If the last cluster is smaller than cluster_size pad it with zeroes - if len(cluster) < cluster_size: - cluster += bytes(cluster_size - len(cluster)) - # If a cluster has different data from the cluster_to_compare_with then it - # must be allocated in the output file and its L2 entry must be set - if cluster != cluster_to_compare_with: - bitmap_set(l2_bitmap, idx) - allocated_data_clusters += 1 - # Allocated data clusters also need their corresponding L1 entry and L2 table - l1_idx = math.floor(idx / l2_entries_per_table) - if not bitmap_is_set(l1_bitmap, l1_idx): - bitmap_set(l1_bitmap, l1_idx) - allocated_l2_tables += 1 + elif nonzero_clusters is not None: + if diff_file_name: + if diff_virtual_size is None or diff_nonzero_clusters is None: + sys.exit("[Error] QCOW headers for the diff file were not provided.") + # Read all the clusters that differ from the diff_file_name + diff_fd = os.open(diff_file_name, os.O_RDONLY) + last_diff_cluster = align_up(diff_virtual_size, cluster_size) // cluster_size + # In case input_file is bigger than diff_file_name, first check + # if clusters from diff_file_name differ, and then check if the + # rest contain data + diff_nonzero_clusters_set = set(diff_nonzero_clusters) + for cluster in nonzero_clusters: + if cluster >= last_diff_cluster: + allocate_cluster(cluster) + elif cluster in diff_nonzero_clusters_set: + # If a cluster has different data from the original_cluster + # then it must be allocated + cluster_data = os.pread(fd, cluster_size, cluster_size * cluster) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * cluster) + check_cluster_allocate(cluster, cluster_data, original_cluster) + diff_nonzero_clusters_set.remove(cluster) + else: + allocate_cluster(cluster) + + # These are not present in the original file + for cluster in diff_nonzero_clusters_set: + allocate_cluster(cluster) + else: + for cluster in nonzero_clusters: + allocate_cluster(cluster) + + else: zero_cluster = bytes(cluster_size) last_cluster = align_up(block_device_size, cluster_size) // cluster_size if diff_file_name: @@ -384,11 +426,54 @@ def main(): help="enable data_file_raw on the generated image (implies -d)", action="store_true", ) + parser.add_argument( + "--json-header", + dest="json_header", + help="stdin contains a JSON of pre-parsed QCOW2 information" + "(virtual_size, data_clusters, cluster_bits)", + action="store_true", + ) + parser.add_argument( + "--json-header-diff", + dest="json_header_diff", + metavar="json_header_diff", + help="File descriptor that contains a JSON of pre-parsed QCOW2 " + "information for the diff_file_name", + type=int, + default=None, + ) args = parser.parse_args() if args.data_file_raw: args.data_file = True + virtual_size = None + nonzero_clusters = None + diff_virtual_size = None + diff_nonzero_clusters = None + if args.json_header: + json_header = json.load(sys.stdin) + try: + virtual_size = json_header['virtual_size'] + source_cluster_size = 2 ** json_header['cluster_bits'] + if source_cluster_size != args.cluster_size: + args.cluster_size = source_cluster_size + nonzero_clusters = json_header['data_clusters'] + except KeyError as e: + raise RuntimeError(f'Incomplete JSON - missing value for {str(e)}') from e + if args.json_header_diff: + f = os.fdopen(args.json_header_diff) + json_header = json.load(f) + try: + diff_virtual_size = json_header['virtual_size'] + if 2 ** json_header['cluster_bits'] == args.cluster_size: + diff_nonzero_clusters = json_header['data_clusters'] + else: + sys.exit(f"[Error] Cluster size in the files being compared are " + f"different: {2**json_header['cluster_bits']} vs. {args.cluster_size}") + except KeyError as e: + raise RuntimeError(f'Incomplete JSON for the diff - missing value for {str(e)}') from e + if not os.path.exists(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist.") @@ -413,7 +498,11 @@ def main(): args.refcount_bits, data_file_name, args.data_file_raw, - args.diff_file_name + args.diff_file_name, + virtual_size, + nonzero_clusters, + diff_virtual_size, + diff_nonzero_clusters ) From 15f80881f08df8b1dc1244ffcff58c5f0af4c90d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:30:50 +0000 Subject: [PATCH 22/25] vhd_tool_wrapper: Make vhd_of_device generic Take the expected driver type as a parameter, to allow this helper to be used by qcow code as well. Signed-off-by: Andrii Sultanov --- ocaml/xapi/vhd_tool_wrapper.ml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/vhd_tool_wrapper.ml b/ocaml/xapi/vhd_tool_wrapper.ml index 73f25785eb8..f3f791fe251 100644 --- a/ocaml/xapi/vhd_tool_wrapper.ml +++ b/ocaml/xapi/vhd_tool_wrapper.ml @@ -149,25 +149,27 @@ let find_backend_device path = raise Not_found with _ -> None -(** [vhd_of_device path] returns (Some vhd) where 'vhd' is the vhd leaf backing a particular device [path] or None. - [path] may either be a blktap2 device *or* a blkfront device backed by a blktap2 device. If the latter then - the script must be run in the same domain as blkback. *) -let vhd_of_device path = +(** [backing_file_of_device path] returns (Some backing_file) where 'backing_file' + is the leaf backing a particular device [path] (with a driver of type + [driver] or None. [path] may either be a blktap2 device *or* a blkfront + device backed by a blktap2 device. If the latter then the script must be + run in the same domain as blkback. *) +let backing_file_of_device path ~driver = let tapdisk_of_path path = try match Tapctl.of_device (Tapctl.create ()) path with - | _, _, Some ("vhd", vhd) -> - Some vhd + | _, _, Some (typ, backing_file) when typ = driver -> + Some backing_file | _, _, _ -> raise Not_found with | Tapctl.Not_blktap -> ( debug "Device %s is not controlled by blktap" path ; - (* Check if it is a VHD behind a NBD deivce *) + (* Check if it is a [driver] behind a NBD device *) Stream_vdi.(get_nbd_device path |> image_behind_nbd_device) |> function - | Some ("vhd", vhd) -> - debug "%s is a VHD behind NBD device %s" vhd path ; - Some vhd + | Some (typ, backing_file) when typ = driver -> + debug "%s is a %s behind NBD device %s" backing_file driver path ; + Some backing_file | _ -> None ) @@ -182,6 +184,7 @@ let vhd_of_device path = let send progress_cb ?relative_to (protocol : string) (dest_format : string) (s : Unix.file_descr) (path : string) (size : Int64.t) (prefix : string) = + let vhd_of_device = backing_file_of_device ~driver:"vhd" in let s' = Uuidx.(to_string (make ())) in let source_format, source = match (Stream_vdi.get_nbd_device path, vhd_of_device path, relative_to) with From 368596819c29c1682b2d07528b979615a094de00 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:42:33 +0000 Subject: [PATCH 23/25] qcow_tool_wrapper: Read headers of QCOW2-backed VDIs on export Pass the JSON output of read_headers into qcow2-to-stdout to handle the export further. Signed-off-by: Andrii Sultanov --- ocaml/xapi/qcow_tool_wrapper.ml | 55 +++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index 30d0eb63811..cd42ca123d3 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -16,14 +16,15 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D -let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit) - (args : string list) = +let run_qcow_tool qcow_tool ?(replace_fds = []) ?input_fd ?output_fd + (_progress_cb : int -> unit) (args : string list) = info "Executing %s %s" qcow_tool (String.concat " " args) ; let open Forkhelpers in match with_logfile_fd "qcow-tool" (fun log_fd -> let pid = - safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args + safe_close_and_exec input_fd output_fd (Some log_fd) replace_fds + qcow_tool args in let _, status = waitpid pid in if status <> Unix.WEXITED 0 then ( @@ -46,14 +47,56 @@ let update_task_progress (__context : Context.t) (x : int) = let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) = - let args = [path] in + let args = ["stream_decode"; path] in let qcow_tool = !Xapi_globs.qcow_stream_tool in run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd +let read_header qcow_path = + let args = ["read_headers"; qcow_path] in + let qcow_tool = !Xapi_globs.qcow_stream_tool in + let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in + + let progress_cb _ = () in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> run_qcow_tool qcow_tool progress_cb args ~output_fd:pipe_writer) + (fun () -> Unix.close pipe_writer) ; + pipe_reader + let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = + let qcow_of_device = + Vhd_tool_wrapper.backing_file_of_device ~driver:"qcow2" + in + let qcow_path = qcow_of_device path in + + (* If VDI is backed by QCOW, parse the header to determine nonzero clusters + to avoid reading all of the raw disk *) + let input_fd = Option.map read_header qcow_path in + + (* Parse the header of the VDI we are diffing against as well *) + let relative_to_qcow_path = Option.bind relative_to qcow_of_device in + let diff_fd = Option.map read_header relative_to_qcow_path in + + let unique_string = Uuidx.(to_string (make ())) in let args = - [path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi] + [path] + @ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi]) + @ ( match relative_to_qcow_path with + | None -> + [] + | Some _ -> + ["--json-header-diff"; unique_string] + ) + @ match qcow_path with None -> [] | Some _ -> ["--json-header"] in let qcow_tool = !Xapi_globs.qcow_to_stdout in - run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd + let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> + run_qcow_tool qcow_tool progress_cb args ?input_fd ~output_fd:unix_fd + ?replace_fds + ) + (fun () -> + Option.iter Unix.close input_fd ; + Option.iter Unix.close diff_fd + ) From 89140761d71277f233133d6d1f655ecfcbb16864 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 26 Nov 2025 10:44:29 +0000 Subject: [PATCH 24/25] qcow_tool_wrapper: Implement parse_header to determine allocated clusters Translates JSON from qcow-stream-tool to OCaml types. This is currently unused, but will be used in stream_vdi and vhd_tool_wrapper in the future. Signed-off-by: Andrii Sultanov --- ocaml/xapi/qcow_tool_wrapper.ml | 14 ++++++++++++++ ocaml/xapi/qcow_tool_wrapper.mli | 2 ++ 2 files changed, 16 insertions(+) diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index cd42ca123d3..e3cd13d469b 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -62,6 +62,20 @@ let read_header qcow_path = (fun () -> Unix.close pipe_writer) ; pipe_reader +let parse_header qcow_path = + let pipe_reader = read_header qcow_path in + let ic = Unix.in_channel_of_descr pipe_reader in + let buf = Buffer.create 4096 in + let json = Yojson.Basic.from_channel ~buf ~fname:"qcow_header.json" ic in + In_channel.close ic ; + let cluster_size = + 1 lsl Yojson.Basic.Util.(member "cluster_bits" json |> to_int) + in + let cluster_list = + Yojson.Basic.Util.(member "data_clusters" json |> to_list |> List.map to_int) + in + (cluster_size, cluster_list) + let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = let qcow_of_device = diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli index 51c3c626567..c1c4a6426af 100644 --- a/ocaml/xapi/qcow_tool_wrapper.mli +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -23,3 +23,5 @@ val send : -> string -> int64 -> unit + +val parse_header : string -> int * int list From fe45173872b07bc3288ce6cb064e441e4a8a1067 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 10 Dec 2025 17:29:17 +0800 Subject: [PATCH 25/25] Update lifecycle Signed-off-by: Changlei Li --- ocaml/idl/datamodel_lifecycle.ml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 38b68763627..ef14813ad97 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -98,15 +98,15 @@ let prototyped_of_field = function | "SM", "host_pending_features" -> Some "24.37.0" | "host", "timezone" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "ntp_custom_servers" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "ntp_mode" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "secure_boot" -> Some "25.31.0" | "host", "max_cstate" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "ssh_auto_mode" -> Some "25.27.0" | "host", "console_idle_timeout" -> @@ -234,21 +234,21 @@ let prototyped_of_message = function | "VTPM", "create" -> Some "22.26.0" | "host", "set_servertime" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "get_ntp_synchronized" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "list_timezones" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "set_timezone" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "get_ntp_servers_status" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "set_ntp_custom_servers" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "set_ntp_mode" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "set_max_cstate" -> - Some "25.37.0-next" + Some "25.39.0-next" | "host", "update_firewalld_service_status" -> Some "25.34.0" | "host", "get_tracked_user_agents" ->