Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions build/docker/intel-gpu-levelzero.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,15 @@ RUN if [ $ROCKYLINUX -eq 0 ]; then \
LATEST_GO=$(curl --no-progress-meter https://go.dev/dl/?mode=json | jq ".[] | select(.version | startswith(\"go${CGO_VERSION}\")).version" | tr -d "\"") && \
wget -q https://go.dev/dl/$LATEST_GO.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \
cd /runtime && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-level-zero-gpu_1.6.32961.7_amd64.deb && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-opencl-icd_25.09.32961.7_amd64.deb && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/libigdgmm12_22.6.0_amd64.deb && \
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero-devel_1.20.2+u22.04_amd64.deb && \
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero_1.20.2+u22.04_amd64.deb && \
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-core-2_2.8.3+18762_amd64.deb && \
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-opencl-2_2.8.3+18762_amd64.deb && \
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero-devel_1.24.3+u22.04_amd64.deb && \
dpkg -i *.deb && \
rm -f *.deb && \
rm -rf /var/lib/apt/lists/\*; \
else \
source /etc/os-release && dnf install -y gcc jq wget 'dnf-command(config-manager)' && \
Expand Down Expand Up @@ -83,9 +84,19 @@ ARG CMD
ARG ROCKYLINUX
COPY --from=builder /runtime /runtime
RUN if [ $ROCKYLINUX -eq 0 ]; then \
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 && \
rm /runtime/level-zero-devel_*.deb && \
cd /runtime && dpkg -i *.deb && rm -rf /runtime && \
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 wget ca-certificates && \
cd /runtime && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \
wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \
dpkg -i *.deb && \
apt-get -y remove wget ca-certificates && \
apt-get -y autoremove && \
rm -f *.deb && \
rm -rf /var/lib/apt/lists/\* && \
rm "/lib/x86_64-linux-gnu/libze_validation"* && rm "/lib/x86_64-linux-gnu/libze_tracing_layer"*; \
else \
cp -a /runtime//*.so* /usr/lib64/ && cp -a /runtime/OpenCL /etc/ && cp -a /runtime/licenses/* /usr/share/licenses/; \
Expand Down
31 changes: 21 additions & 10 deletions build/docker/templates/intel-gpu-levelzero.Dockerfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@ RUN if [ $ROCKYLINUX -eq 0 ]; then \N
LATEST_GO=$(curl --no-progress-meter https://go.dev/dl/?mode=json | jq ".[] | select(.version | startswith(\"go${CGO_VERSION}\")).version" | tr -d "\"") && \N
wget -q https://go.dev/dl/$LATEST_GO.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \N
cd /runtime && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-level-zero-gpu_1.6.32961.7_amd64.deb && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-opencl-icd_25.09.32961.7_amd64.deb && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/libigdgmm12_22.6.0_amd64.deb && \N
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero-devel_1.20.2+u22.04_amd64.deb && \N
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero_1.20.2+u22.04_amd64.deb && \N
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-core-2_2.8.3+18762_amd64.deb && \N
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-opencl-2_2.8.3+18762_amd64.deb && \N
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \N
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \N
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \N
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \N
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero-devel_1.24.3+u22.04_amd64.deb && \N
dpkg -i *.deb && \N
rm -f *.deb && \N
rm -rf /var/lib/apt/lists/\*; \N
else \N
source /etc/os-release && dnf install -y gcc jq wget 'dnf-command(config-manager)' && \N
Expand Down Expand Up @@ -80,9 +81,19 @@ ARG ROCKYLINUX
COPY --from=builder /runtime /runtime

RUN if [ $ROCKYLINUX -eq 0 ]; then \N
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 && \N
rm /runtime/level-zero-devel_*.deb && \N
cd /runtime && dpkg -i *.deb && rm -rf /runtime && \N
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 wget ca-certificates && \N
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect the main size reduction from this duplication comes actually from removing the accidentally left (large) downloaded deb files, not from dropping l0-dev, wget, certs & their deps. Did you check that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It mainly comes from this:
COPY --from=builder /runtime /runtime
The runtime deb packages are copied from the build to the final phase, and while they are removed after the install the copy creates a large unnecessary layer. This is evident if you open the container in dive.

Would be nice if one could install packages directly from the build phase.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

docker does not support host volumes for builds (needs extension), but podman does. What if packages were on a host tmp volume (-v $(mktemp -d):/temporary:rw), I don't think those go to the final image?

cd /runtime && \N
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \N
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \N
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \N
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \N
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \N
wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \N
dpkg -i *.deb && \N
apt-get -y remove wget ca-certificates && \N
apt-get -y autoremove && \N
rm -f *.deb && \N
rm -rf /var/lib/apt/lists/\* && \N
rm "/lib/x86_64-linux-gnu/libze_validation"* && rm "/lib/x86_64-linux-gnu/libze_tracing_layer"*; \N
else \N
cp -a /runtime//*.so* /usr/lib64/ && cp -a /runtime/OpenCL /etc/ && cp -a /runtime/licenses/* /usr/share/licenses/; \N
Expand Down
15 changes: 11 additions & 4 deletions cmd/gpu_levelzero/zes.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,12 @@ static ze_result_t enumerate_zes_devices(void)
for (uint32_t i = 0; i < count; ++i) {
zes_device_handle_t dev_h = zes_handles[i];

zes_pci_properties_t pci_props;
zes_pci_properties_t pci_props = {
.pNext = NULL,
};

if (zesDevicePciGetProperties(dev_h, &pci_props) != ZE_RESULT_SUCCESS) {
print_log(LOG_WARNING, "Failed to get PCI properties for device %d: %X\n", i, res);
continue;
}

Expand Down Expand Up @@ -332,8 +336,9 @@ bool zes_device_bus_is_healthy(char* bdf_address, uint32_t* error)
return true;
}

zes_pci_state_t pci_state;
memset(&pci_state, 0, sizeof(pci_state));
zes_pci_state_t pci_state = {
.pNext = NULL,
};

ze_result_t res = zesDevicePciGetState(handle, &pci_state);
if (res == ZE_RESULT_SUCCESS) {
Expand Down Expand Up @@ -409,7 +414,9 @@ double zes_device_temp_max(char* bdf_address, char* sensor, uint32_t* error)
}

for (uint32_t i = 0; i < count; ++i) {
zes_temp_properties_t props;
zes_temp_properties_t props = {
.pNext = NULL,
};

res = zesTemperatureGetProperties(tempHandles[i], &props);
if (res != ZE_RESULT_SUCCESS) {
Expand Down
21 changes: 21 additions & 0 deletions cmd/gpu_plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Table of Contents
* [CDI support](#cdi-support)
* [KMD and UMD](#kmd-and-umd)
* [Health management](#health-management)
* [by-path mounting](#by-path-mounting)
* [Issues with media workloads on multi-GPU setups](#issues-with-media-workloads-on-multi-gpu-setups)
* [Workaround for QSV and VA-API](#workaround-for-qsv-and-va-api)

Expand Down Expand Up @@ -60,6 +61,7 @@ For workloads on different KMDs, see [KMD and UMD](#kmd-and-umd).
| -allow-ids | string | "" | A list of PCI Device IDs that are allowed to be registered as resources. Default is empty (=all registered). Cannot be used together with `deny-ids`. |
| -deny-ids | string | "" | A list of PCI Device IDs that are denied to be registered as resources. Default is empty (=all registered). Cannot be used together with `allow-ids`. |
| -allocation-policy | string | none | 3 possible values: balanced, packed, none. For shared-dev-num > 1: _balanced_ mode spreads workloads among GPU devices, _packed_ mode fills one GPU fully before moving to next, and _none_ selects first available device from kubelet. Default is _none_. |
| -bypath | string | single | 3 possible values: single, none, all. Default is single. Changes how the by-path symlinks are handled by the plugin. More [info](#by-path-mounting). |

The plugin also accepts a number of other arguments (common to all plugins) related to logging.
Please use the -h option to see the complete list of logging related options.
Expand Down Expand Up @@ -258,6 +260,25 @@ Kubernetes Device Plugin API allows passing device's healthiness to Kubelet. By

Temperature limit can be provided via the command line argument, default is 100C.

### By-path mounting

The DRM devices for the Intel GPUs register `by-path` symlinks under `/dev/dri/by-path`. For each GPU character device, there is a corresponding symlink in the by-path directory:
```
$ ls -l /dev/dri/by-path/
lrwxrwxrwx 1 root root 8 oct x 13:09 pci-0000:00:02.0-card -> ../card1
lrwxrwxrwx 1 root root 13 oct x 13:09 pci-0000:00:02.0-render -> ../renderD128
```

The Intel GPU UMD uses these symlinks to detect hardware properties in some cases. Mounting the by-path symlinks as __symlinks__ with the Device plugin API (DP API) is not possible. When the symlinks are mounted via the DP API, they are mounted as the actual devices, and the symlink information is lost (pci address).

To support possible all use cases, GPU plugin allows changing the by-path mounting method. The options are:
* `single` - Symlinks are individually mounted per device. Default.
* Mostly Works, but is known to have issues with some pytorch workloads. See [issue](https://github.com/intel/intel-device-plugins-for-kubernetes/issues/2158).
* `none` - No symlinks are mounted.
* Aligned with Docker `privileged` mode devices usage.
* `all` - Mounts whole DRM `by-path` directory. Pro: symlink file types are preserved. Con: symlinks are present for all devices.
* Optimal for scale-up workloads where all the GPUs are used by the workload.

### Issues with media workloads on multi-GPU setups

OneVPL media API, 3D and compute APIs provide device discovery
Expand Down
50 changes: 40 additions & 10 deletions cmd/gpu_plugin/gpu_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ const (
devfsDriDirectory = "/dev/dri"
wslDxgPath = "/dev/dxg"
wslLibPath = "/usr/lib/wsl"
nfdFeatureDir = "/etc/kubernetes/node-feature-discovery/features.d"
resourceFilename = "intel-gpu-resources.txt"
gpuDeviceRE = `^card[0-9]+$`
controlDeviceRE = `^controlD[0-9]+$`
pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$"
Expand All @@ -61,6 +59,10 @@ const (
monitorSuffix = "_monitoring"
monitorID = "all"

bypathOptionNone = "none"
bypathOptionAll = "all"
bypathOptionSingle = "single"

levelzeroAffinityMaskEnvVar = "ZE_AFFINITY_MASK"

// Period of device scans.
Expand All @@ -71,8 +73,11 @@ type cliOptions struct {
preferredAllocationPolicy string
allowIDs string
denyIDs string
bypathMount string
sharedDevNum int
temperatureLimit int
globalTempLimit int
memoryTempLimit int
gpuTempLimit int
enableMonitoring bool
wslScan bool
healthManagement bool
Expand Down Expand Up @@ -289,6 +294,16 @@ func (dp *devicePlugin) bypathMountsForPci(pciAddress, bypathDir string) []plugi
return mounts
}

func (dp *devicePlugin) bypathMountForAll() []pluginapi.Mount {
return []pluginapi.Mount{
{
ContainerPath: dp.bypathDir,
HostPath: dp.bypathDir,
ReadOnly: true,
},
}
}

type devicePlugin struct {
gpuDeviceReg *regexp.Regexp
controlDeviceReg *regexp.Regexp
Expand Down Expand Up @@ -404,13 +419,13 @@ func (dp *devicePlugin) healthStatusForCard(cardPath string) string {
return health
}

limit := float64(dp.options.temperatureLimit)

// Temperatures for different areas
klog.V(4).Infof("Temperatures: Memory=%.1fC, GPU=%.1fC, Global=%.1fC",
klog.V(4).Infof("Temperatures: Memory=%dC, GPU=%dC, Global=%dC",
deviceTemps.Memory, deviceTemps.GPU, deviceTemps.Global)

if deviceTemps.GPU > limit || deviceTemps.Global > limit || deviceTemps.Memory > limit {
if deviceTemps.GPU > dp.options.gpuTempLimit ||
deviceTemps.Global > dp.options.globalTempLimit ||
deviceTemps.Memory > dp.options.memoryTempLimit {
health = pluginapi.Unhealthy
}

Expand Down Expand Up @@ -660,8 +675,20 @@ func (dp *devicePlugin) createMountsAndCDIDevices(cardPath, name string, devSpec
mounts := []pluginapi.Mount{}

if dp.bypathFound {
if pciAddr, pciErr := dp.pciAddressForCard(cardPath, name); pciErr == nil {
mounts = dp.bypathMountsForPci(pciAddr, dp.bypathDir)
switch dp.options.bypathMount {
case bypathOptionAll:
klog.V(4).Info("Using by-path mount option: all")
mounts = dp.bypathMountForAll()
case bypathOptionNone:
klog.V(4).Info("Using by-path mount option: none")
// no mounts
case bypathOptionSingle:
fallthrough
default:
klog.V(4).Info("Using by-path mount option: single/default")
if pciAddr, pciErr := dp.pciAddressForCard(cardPath, name); pciErr == nil {
mounts = dp.bypathMountsForPci(pciAddr, dp.bypathDir)
}
}
}

Expand Down Expand Up @@ -784,9 +811,12 @@ func main() {
flag.StringVar(&prefix, "prefix", "", "Prefix for devfs & sysfs paths")
flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable '*_monitoring' (= all GPUs) resource")
flag.BoolVar(&opts.healthManagement, "health-management", false, "enable GPU health management")
flag.StringVar(&opts.bypathMount, "bypath", bypathOptionSingle, "DRI device 'by-path/' directory mounting options: single, none, all. Default: single")
flag.BoolVar(&opts.wslScan, "wsl", false, "scan for / use WSL devices")
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
flag.IntVar(&opts.temperatureLimit, "temp-limit", 100, "temperature limit at which device is marked unhealthy")
flag.IntVar(&opts.globalTempLimit, "temp-limit", 100, "Global temperature limit at which device is marked unhealthy")
flag.IntVar(&opts.gpuTempLimit, "gpu-temp-limit", 100, "GPU temperature limit at which device is marked unhealthy")
flag.IntVar(&opts.memoryTempLimit, "memory-temp-limit", 100, "Memory temperature limit at which device is marked unhealthy")
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
flag.StringVar(&opts.allowIDs, "allow-ids", "", "comma-separated list of device IDs to allow (e.g. 0x49c5,0x49c6)")
flag.StringVar(&opts.denyIDs, "deny-ids", "", "comma-separated list of device IDs to deny (e.g. 0x49c5,0x49c6)")
Expand Down
Loading