Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/otlp/gauges/gpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) 2024-2025 Optimatist Technology Co., Ltd. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This file is part of PSH.
//
// PSH is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
//
// PSH is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with Performance Savior Home (PSH). If not,
// see <https://www.gnu.org/licenses/>.

use opentelemetry::{KeyValue, metrics::ObservableGauge};
use psh_system::gpu::NvidiaHandle;

impl super::super::Otlp {
pub fn gpu_gauges(&self) -> anyhow::Result<ObservableGauge<u64>> {
let host = self.host.clone();
let interval = self.interval;
let nvgpu = NvidiaHandle::new();

let gauge = self
.meter
.u64_observable_gauge("NvGpuStat")
.with_description("System profile nvgpu statistics.")
.with_callback(move |gauge| {
let Ok(gpustats) = nvgpu.stat(Some(interval)) else {
return;
};

for stat in gpustats {
let vals = [
(stat.irq_num.into(), KeyValue::new("stat", "irq_num")),
(
stat.temperature.into(),
KeyValue::new("stat", "temperature"),
),
(
stat.max_pcie_link_gen.into(),
KeyValue::new("stat", "max_pcie_link_gen"),
),
(
stat.memory_info.total,
KeyValue::new("stat", "memory_total"),
),
(stat.memory_info.used, KeyValue::new("stat", "memory_used")),
(
stat.utilization_rates.memory.into(),
KeyValue::new("stat", "utilization_rates_memory"),
),
(
stat.utilization_rates.gpu.into(),
KeyValue::new("stat", "utilization_rates_gpu"),
),
];
for val in vals.into_iter() {
gauge.observe(
val.0,
&[
KeyValue::new("host", host.clone()),
KeyValue::new("uuid", stat.uuid.clone()),
KeyValue::new("name", stat.name.clone()),
val.1,
],
);
}
}
})
.build();
Ok(gauge)
}
}
1 change: 1 addition & 0 deletions src/otlp/gauges/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

pub mod cpu;
pub mod disk;
pub mod gpu;
pub mod interrupt;
pub mod memory;
pub mod network;
Expand Down
3 changes: 3 additions & 0 deletions src/otlp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ impl Otlp {
if let Err(e) = self.vmstat_gauges() {
tracing::error!("Otlp vmstat: {e}")
}
if let Err(e) = self.gpu_gauges() {
tracing::error!("Otlp gpu: {e}")
}

loop {
tokio::time::sleep(interval).await;
Expand Down
Loading