Skip to content

Commit

Permalink
Make NVDRIVER configurable from env file
Browse files Browse the repository at this point in the history
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
  • Loading branch information
ArangoGutierrez committed Feb 12, 2025
1 parent b1e030a commit 8c347ce
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 8 deletions.
57 changes: 56 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,59 @@ Dryrun environment holodeck 🔍
✔ Checking if image ami-0fe8bec493a81c7da is supported in region eu-north-1
✔ Resolving dependencies 📦
Dryrun succeeded 🎉
```
```
## Supported Cuda-Drivers
Supported Nvidia drivers are:
```yaml
nvidiaDriver:
install: true
version: <version>
```
Where `<version>` can be a prefix of any package version. The following are example package versions:
- 570.86.15-0ubuntu1
- 570.86.10-0ubuntu1
- 565.57.01-0ubuntu1
- 560.35.05-0ubuntu1
- 560.35.03-1
- 560.28.03-1
- 555.42.06-1
- 555.42.02-1
- 550.144.03-0ubuntu1
- 550.127.08-0ubuntu1
- 550.127.05-0ubuntu1
- 550.90.12-0ubuntu1
- 550.90.07-1
- 550.54.15-1
- 550.54.14-1
- 545.23.08-1
- 545.23.06-1
- 535.230.02-0ubuntu1
- 535.216.03-0ubuntu1
- 535.216.01-0ubuntu1
- 535.183.06-1
- 535.183.01-1
- 535.161.08-1
- 535.161.07-1
- 535.154.05-1
- 535.129.03-1
- 535.104.12-1
- 535.104.05-1
- 535.86.10-1
- 535.54.03-1
- 530.30.02-1
- 525.147.05-1
- 525.125.06-1
- 525.105.17-1
- 525.85.12-1
- 525.60.13-1
- 520.61.05-1
- 515.105.01-1
- 515.86.01-1
- 515.65.07-1
- 515.65.01-1
- 515.48.07-1
- 515.43.04-1
2 changes: 1 addition & 1 deletion pkg/provisioner/dependency.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ var (
type ProvisionFunc func(tpl *bytes.Buffer, env v1alpha1.Environment) error

func nvdriver(tpl *bytes.Buffer, env v1alpha1.Environment) error {
nvdriver := templates.NewNvDriver()
nvdriver := templates.NewNvDriver(env)
return nvdriver.Execute(tpl, env)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/provisioner/templates/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ install_packages_with_retry() {
echo "Attempt $i to install packages: ${packages[@]}"
# Attempt to install packages
sudo apt-get install -y "${packages[@]}"
sudo apt-get install -y --no-install-recommends "${packages[@]}"
# Check if the last command failed and the error is related to unsigned repository
if [ $? -ne 0 ] && grep -q 'NO_PUBKEY' <<< "$(tail -n 1 /var/lib/dpkg/status 2>/dev/null)"; then
Expand Down
14 changes: 9 additions & 5 deletions pkg/provisioner/templates/nv-driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,25 @@ wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_
sudo dpkg -i cuda-keyring_1.1-1_all.deb
with_retry 3 10s sudo apt-get update
install_packages_with_retry cuda-drivers
install_packages_with_retry cuda-drivers{{if .Version}}={{.Version}}{{end}}
nvidia-smi -L
nvidia-smi
`

type NvDriver struct {
// Version -- if specified -- indicates the version of the `cuda-drivers` package to install.
Version string
}

func NewNvDriver() *NvDriver {
return &NvDriver{}
func NewNvDriver(env v1alpha1.Environment) *NvDriver {
return &NvDriver{
Version: env.Spec.NVIDIADriver.Version,
}
}

func (t *NvDriver) Execute(tpl *bytes.Buffer, env v1alpha1.Environment) error {
nvDriverTemplate := template.Must(template.New("nv-driver").Parse(NvDriverTemplate))
err := nvDriverTemplate.Execute(tpl, &NvDriver{})
err := nvDriverTemplate.Execute(tpl, t)
if err != nil {
return fmt.Errorf("failed to execute nv-driver template: %v", err)
}
Expand Down

0 comments on commit 8c347ce

Please sign in to comment.