@@ -1276,22 +1276,80 @@ let update_vm ~__context id =
12761276 let a = Opt. map (fun x -> f (snd x)) info in
12771277 let b = Opt. map f previous in
12781278 a <> b in
1279+ (* Helpers to create and update guest metrics when needed *)
1280+ let lookup state key =
1281+ if List. mem_assoc key state.guest_agent then Some (List. assoc key state.guest_agent) else None in
1282+ let list state dir =
1283+ let dir = if dir.[0 ] = '/' then String. sub dir 1 (String. length dir - 1 ) else dir in
1284+ let results = Listext.List. filter_map (fun (path , value ) ->
1285+ if String. startswith dir path then begin
1286+ let rest = String. sub path (String. length dir) (String. length path - (String. length dir)) in
1287+ match List. filter (fun x -> x <> " " ) (String. split '/' rest) with
1288+ | x :: _ -> Some x
1289+ | _ -> None
1290+ end else None
1291+ ) state.guest_agent |> Listext.List. setify in
1292+ results in
1293+ let create_guest_metrics_if_needed () =
1294+ let gm = Db.VM. get_guest_metrics ~__context ~self in
1295+ if gm = Ref. null then
1296+ Opt. iter
1297+ (fun (_ , state ) ->
1298+ List. iter
1299+ (fun domid ->
1300+ try
1301+ let new_gm_ref = Xapi_guest_agent. create_and_set_guest_metrics (lookup state) (list state) ~__context ~domid ~uuid: id in
1302+ debug " xenopsd event: created guest metrics %s for VM %s" (Ref. string_of new_gm_ref) id
1303+ with e ->
1304+ error " Caught %s: while creating VM %s guest metrics" (Printexc. to_string e) id
1305+ ) state.domids
1306+ ) info in
1307+ let check_guest_agent () =
1308+ Opt. iter
1309+ (fun (_ , state ) ->
1310+ Opt. iter (fun oldstate ->
1311+ let old_ga = oldstate.guest_agent in
1312+ let new_ga = state.guest_agent in
1313+
1314+ (* Remove memory keys *)
1315+ let ignored_keys = [ " data/meminfo_free" ; " data/updated" ; " data/update_cnt" ] in
1316+ let remove_ignored ga =
1317+ List. fold_left (fun acc k -> List. filter (fun x -> fst x <> k) acc) ga ignored_keys in
1318+ let old_ga = remove_ignored old_ga in
1319+ let new_ga = remove_ignored new_ga in
1320+ if new_ga <> old_ga then begin
1321+ debug " Will update VM.allowed_operations because guest_agent has changed." ;
1322+ should_update_allowed_operations := true
1323+ end else begin
1324+ debug " Supressing VM.allowed_operations update because guest_agent data is largely the same"
1325+ end
1326+ ) previous;
1327+ List. iter
1328+ (fun domid ->
1329+ try
1330+ debug " xenopsd event: Updating VM %s domid %d guest_agent" id domid;
1331+ Xapi_guest_agent. all (lookup state) (list state) ~__context ~domid ~uuid: id
1332+ with e ->
1333+ error " Caught %s: while updating VM %s guest_agent" (Printexc. to_string e) id
1334+ ) state.domids
1335+ ) info in
12791336 (* Notes on error handling: if something fails we log and continue, to
1280- maximise the amount of state which is correctly synced. If something
1281- does fail then we may end up permanently out-of-sync until either a
1282- process restart or an event is generated. We may wish to periodically
1283- inject artificial events IF there has been an event sync failure? *)
1337+ maximise the amount of state which is correctly synced. If something
1338+ does fail then we may end up permanently out-of-sync until either a
1339+ process restart or an event is generated. We may wish to periodically
1340+ inject artificial events IF there has been an event sync failure? *)
12841341 if different (fun x -> x.power_state) then begin
12851342 try
12861343 debug " Will update VM.allowed_operations because power_state has changed." ;
12871344 should_update_allowed_operations := true ;
12881345 let power_state = xenapi_of_xenops_power_state (Opt. map (fun x -> (snd x).power_state) info) in
12891346 debug " xenopsd event: Updating VM %s power_state <- %s" id (Record_util. power_state_to_string power_state);
12901347 (* This will mark VBDs, VIFs as detached and clear resident_on
1291- if the VM has permanently shutdown. current-operations
1292- should not be reset as there maybe a checkpoint is ongoing*)
1348+ if the VM has permanently shutdown. current-operations
1349+ should not be reset as there maybe a checkpoint is ongoing*)
12931350 Xapi_vm_lifecycle. force_state_reset_keep_current_operations ~__context ~self ~value: power_state;
12941351
1352+ if power_state = `Running then create_guest_metrics_if_needed () ;
12951353 if power_state = `Suspended || power_state = `Halted then begin
12961354 Xapi_network. detach_for_vm ~__context ~host: localhost ~vm: self;
12971355 Storage_access. reset ~__context ~vm: self;
@@ -1406,48 +1464,6 @@ let update_vm ~__context id =
14061464 with e ->
14071465 error " Caught %s: while updating VM %s rtc/timeoffset" (Printexc. to_string e) id
14081466 end ;
1409- let check_guest_agent () =
1410- Opt. iter
1411- (fun (_ , state ) ->
1412- Opt. iter (fun oldstate ->
1413- let old_ga = oldstate.guest_agent in
1414- let new_ga = state.guest_agent in
1415-
1416- (* Remove memory keys *)
1417- let ignored_keys = [ " data/meminfo_free" ; " data/updated" ; " data/update_cnt" ] in
1418- let remove_ignored ga =
1419- List. fold_left (fun acc k -> List. filter (fun x -> fst x <> k) acc) ga ignored_keys in
1420- let old_ga = remove_ignored old_ga in
1421- let new_ga = remove_ignored new_ga in
1422- if new_ga <> old_ga then begin
1423- debug " Will update VM.allowed_operations because guest_agent has changed." ;
1424- should_update_allowed_operations := true
1425- end else begin
1426- debug " Supressing VM.allowed_operations update because guest_agent data is largely the same"
1427- end
1428- ) previous;
1429- List. iter
1430- (fun domid ->
1431- let lookup key =
1432- if List. mem_assoc key state.guest_agent then Some (List. assoc key state.guest_agent) else None in
1433- let list dir =
1434- let dir = if dir.[0 ] = '/' then String. sub dir 1 (String. length dir - 1 ) else dir in
1435- let results = Listext.List. filter_map (fun (path , value ) ->
1436- if String. startswith dir path then begin
1437- let rest = String. sub path (String. length dir) (String. length path - (String. length dir)) in
1438- match List. filter (fun x -> x <> " " ) (String. split '/' rest) with
1439- | x :: _ -> Some x
1440- | _ -> None
1441- end else None
1442- ) state.guest_agent |> Listext.List. setify in
1443- results in
1444- try
1445- debug " xenopsd event: Updating VM %s domid %d guest_agent" id domid;
1446- Xapi_guest_agent. all lookup list ~__context ~domid ~uuid: id
1447- with e ->
1448- error " Caught %s: while updating VM %s guest_agent" (Printexc. to_string e) id
1449- ) state.domids
1450- ) info in
14511467 if different (fun x -> x.hvm) then begin
14521468 Opt. iter
14531469 (fun (_ , state ) ->
@@ -1484,15 +1500,44 @@ let update_vm ~__context id =
14841500 let update_pv_drivers_detected () =
14851501 Opt. iter
14861502 (fun (_ , state ) ->
1487- let gm = Db.VM. get_guest_metrics ~__context ~self in
1488- debug " xenopsd event: Updating VM %s PV drivers detected %b" id state.pv_drivers_detected;
1489- Db.VM_guest_metrics. set_PV_drivers_detected ~__context ~self: gm ~value: state.pv_drivers_detected;
1490- Db.VM_guest_metrics. set_PV_drivers_up_to_date ~__context ~self: gm ~value: state.pv_drivers_detected
1503+ try
1504+ let gm = Db.VM. get_guest_metrics ~__context ~self in
1505+ debug " xenopsd event: Updating VM %s PV drivers detected %b" id state.pv_drivers_detected;
1506+ Db.VM_guest_metrics. set_PV_drivers_detected ~__context ~self: gm ~value: state.pv_drivers_detected;
1507+ Db.VM_guest_metrics. set_PV_drivers_up_to_date ~__context ~self: gm ~value: state.pv_drivers_detected
1508+ with e ->
1509+ debug " Caught %s: while updating VM %s PV drivers" (Printexc. to_string e) id
14911510 ) info in
1511+ (* Chack last_start_time before updating anything in the guest metrics *)
1512+ if different (fun x -> x.last_start_time) then begin
1513+ try
1514+ Opt. iter
1515+ (fun (_ , state ) ->
1516+ debug " xenopsd event: Updating VM %s last_start_time <- %s" id (Date. to_string (Date. of_float state.last_start_time));
1517+ let metrics = Db.VM. get_metrics ~__context ~self in
1518+ let start_time = Date. of_float state.last_start_time in
1519+ Db.VM_metrics. set_start_time ~__context ~self: metrics ~value: start_time;
1520+
1521+ create_guest_metrics_if_needed () ;
1522+ let gm = Db.VM. get_guest_metrics ~__context ~self in
1523+ let update_time = Db.VM_guest_metrics. get_last_updated ~__context ~self: gm in
1524+ if update_time < start_time then begin
1525+ debug " VM %s guest metrics update time (%s) < VM start time (%s): deleting"
1526+ id (Date. to_string update_time) (Date. to_string start_time);
1527+ Xapi_vm_helpers. delete_guest_metrics ~__context ~self ;
1528+ check_guest_agent () ;
1529+ end
1530+ ) info
1531+ with e ->
1532+ error " Caught %s: while updating VM %s last_start_time" (Printexc. to_string e) id
1533+ end ;
14921534 Opt. iter
14931535 (fun (_ , state ) ->
14941536 List. iter
14951537 (fun domid ->
1538+ (* Guest metrics could have been destroyed during the last_start_time check
1539+ by recreating them, we avoid CA-223387 *)
1540+ create_guest_metrics_if_needed () ;
14961541 if different (fun x -> x.uncooperative_balloon_driver) then begin
14971542 debug " xenopsd event: VM %s domid %d uncooperative_balloon_driver = %b" id domid state.uncooperative_balloon_driver;
14981543 end ;
@@ -1526,30 +1571,6 @@ let update_vm ~__context id =
15261571 error " Caught %s: while updating VM %s VCPUs_number" (Printexc. to_string e) id
15271572 ) info
15281573 end ;
1529- if different (fun x -> x.last_start_time) then begin
1530- try
1531- Opt. iter
1532- (fun (_ , state ) ->
1533- debug " xenopsd event: Updating VM %s last_start_time <- %s" id (Date. to_string (Date. of_float state.last_start_time));
1534- let metrics = Db.VM. get_metrics ~__context ~self in
1535- let start_time = Date. of_float state.last_start_time in
1536- Db.VM_metrics. set_start_time ~__context ~self: metrics ~value: start_time;
1537- begin
1538- try
1539- let gm = Db.VM. get_guest_metrics ~__context ~self in
1540- let update_time = Db.VM_guest_metrics. get_last_updated ~__context ~self: gm in
1541- if update_time < start_time then begin
1542- debug " VM %s guest metrics update time (%s) < VM start time (%s): deleting"
1543- id (Date. to_string update_time) (Date. to_string start_time);
1544- Xapi_vm_helpers. delete_guest_metrics ~__context ~self ;
1545- check_guest_agent () ;
1546- end
1547- with _ -> () (* The guest metrics didn't exist *)
1548- end
1549- ) info
1550- with e ->
1551- error " Caught %s: while updating VM %s last_start_time" (Printexc. to_string e) id
1552- end ;
15531574 if different (fun x -> x.shadow_multiplier_target) then begin
15541575 try
15551576 Opt. iter
0 commit comments