Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ crossbeam = { workspace = true }
influxdb-line-protocol = { workspace = true }
psh-proto = { workspace = true }
mimalloc = { workspace = true }
nvml-wrapper = { workspace = true }

[lints]
workspace = true
Expand Down Expand Up @@ -88,6 +89,7 @@ crossbeam = "0.8"
influxdb-line-protocol = "2"
psh-proto = { git = "https://github.com/OptimatistOpenSource/psh-proto.git", rev = "ca2919053029cb584b478611f8bf8496bf3cf7f7" }
mimalloc = "0.1"
nvml-wrapper = "0.10.0"

[workspace.lints.rust]

Expand Down
2 changes: 2 additions & 0 deletions crates/psh-system/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ procfs = { workspace = true }
thiserror = { workspace = true }
uname = { workspace = true }
which = { workspace = true }
nvml-wrapper = { workspace = true }
tracing = { workspace = true }

[dev-dependencies]
num_cpus = { workspace = true }
Expand Down
3 changes: 3 additions & 0 deletions crates/psh-system/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::{io, str::Utf8Error};

use nvml_wrapper::error::NvmlError;
use thiserror::Error;

#[derive(Debug, Error)]
Expand All @@ -32,6 +33,8 @@ pub enum Error {
InvalidCpuMask(String),
#[error("Value is empty")]
EmptyValue,
#[error("Failed to init nvml: {0}.")]
Nvml(#[from] NvmlError),
}

pub type Result<T> = std::result::Result<T, Error>;
43 changes: 43 additions & 0 deletions crates/psh-system/src/gpu/handle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use std::{sync::LazyLock, time::Duration};

use crate::{error::Result, utils::Handle};

use super::{
GpuInfo, GpuStats,
raw::{gpu_info, gpu_stats},
};

static GPU_INFO_HANDLE: LazyLock<Handle<GpuInfo>> = LazyLock::new(|| Handle::new(gpu_info));
static GPU_STATS_HANDLE: LazyLock<Handle<Vec<GpuStats>>> = LazyLock::new(|| Handle::new(gpu_stats));

#[derive(Clone, Debug)]
pub struct NvidiaHandle {
info: Handle<GpuInfo>,
stat: Handle<Vec<GpuStats>>,
}

impl NvidiaHandle {
pub fn new() -> Self {
Self {
info: GPU_INFO_HANDLE.clone(),
stat: GPU_STATS_HANDLE.clone(),
}
}

pub fn info(&self) -> Result<GpuInfo> {
self.info.get(None)
}

pub fn stat<D>(&self, interval: D) -> Result<Vec<GpuStats>>
where
D: Into<Option<Duration>>,
{
self.stat.get(interval.into())
}
}

impl Default for NvidiaHandle {
fn default() -> Self {
Self::new()
}
}
28 changes: 28 additions & 0 deletions crates/psh-system/src/gpu/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
mod handle;
mod raw;

use nvml_wrapper::struct_wrappers::device::{MemoryInfo, PciInfo, Utilization};

pub use handle::NvidiaHandle;

#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct GpuInfo {
pub driver_version: String,
pub cuda_driver_version: i32,
}

#[derive(Clone, Debug)]
pub struct GpuStats {
pub uuid: String,
/// the vec index is fan index
pub fan_speeds: Vec<u32>,
pub vbios_version: String,
pub temperature: u32,
pub name: String,
pub pci_info: PciInfo,
pub irq_num: u32,
pub max_pcie_link_gen: u32,
pub current_pcie_link_gen: u32,
pub utilization_rates: Utilization,
pub memory_info: MemoryInfo,
}
55 changes: 55 additions & 0 deletions crates/psh-system/src/gpu/raw.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use std::sync::LazyLock;

use nvml_wrapper::{Nvml, enum_wrappers::device::TemperatureSensor, error::NvmlError};

use crate::error::Result;

use super::{GpuInfo, GpuStats};

static NVML: LazyLock<Option<Nvml>> = LazyLock::new(|| match Nvml::init() {
Ok(n) => Some(n),
Err(e) => {
tracing::warn!("{e}");
None
}
});

pub fn gpu_info() -> Result<GpuInfo> {
let nvml = NVML.as_ref().ok_or(NvmlError::Uninitialized)?;
Ok(GpuInfo {
driver_version: nvml.sys_driver_version()?,
cuda_driver_version: nvml.sys_cuda_driver_version()?,
})
}

pub fn gpu_stats() -> Result<Vec<GpuStats>> {
let nvml = NVML.as_ref().ok_or(NvmlError::Uninitialized)?;

let count = nvml.device_count()?;

let mut var = Vec::with_capacity(count as usize);
for i in 0..count {
let device = nvml.device_by_index(i)?;
let num_fans = device.num_fans()?;
let mut fan_speeds = Vec::with_capacity(num_fans as usize);
for i in 0..num_fans {
fan_speeds.push(device.fan_speed(i)?);
}

var.push(GpuStats {
uuid: device.uuid()?,
fan_speeds,
vbios_version: device.vbios_version()?,
temperature: device.temperature(TemperatureSensor::Gpu)?,
name: device.name()?,
pci_info: device.pci_info()?,
irq_num: device.irq_num()?,
max_pcie_link_gen: device.max_pcie_link_gen()?,
current_pcie_link_gen: device.current_pcie_link_gen()?,
utilization_rates: device.utilization_rates()?,
memory_info: device.memory_info()?,
});
}

Ok(var)
}
1 change: 1 addition & 0 deletions crates/psh-system/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
pub mod cpu;
pub mod disk;
pub mod error;
pub mod gpu;
pub mod interrupt;
pub mod memory;
pub mod network;
Expand Down
Loading