Skip to content

Commit 4433e4c

Browse files
authored
Merge branch 'stackhpc/2025.1' into claudia-baremetaluser
2 parents 51a7a9b + 4deb6f0 commit 4433e4c

File tree

10 files changed

+217
-115
lines changed

10 files changed

+217
-115
lines changed

.github/workflows/multinode-inputs.py

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class OSRelease:
1717
@dataclass
1818
class OpenStackRelease:
1919
version: str
20+
previous_version: str
2021
os_releases: t.List[OSRelease]
2122

2223

@@ -33,17 +34,27 @@ class Scenario:
3334
UBUNTU_NOBLE = OSRelease("ubuntu", "noble", "ubuntu")
3435
# NOTE(upgrade): Add supported releases here.
3536
OPENSTACK_RELEASES = [
36-
OpenStackRelease("2023.1", [ROCKY_9, UBUNTU_JAMMY]),
37-
OpenStackRelease("2024.1", [ROCKY_9, UBUNTU_JAMMY]),
38-
OpenStackRelease("2025.1", [ROCKY_9, UBUNTU_NOBLE]),
37+
OpenStackRelease("2023.1", "zed", [ROCKY_9, UBUNTU_JAMMY]),
38+
OpenStackRelease("2024.1", "2023.1", [ROCKY_9, UBUNTU_JAMMY]),
39+
OpenStackRelease("2025.1", "2024.1", [ROCKY_9, UBUNTU_NOBLE]),
3940
]
4041
NEUTRON_PLUGINS = ["ovs", "ovn"]
4142
VERSION_HIERARCHY = ["zed", "2023.1", "2024.1", "2025.1"]
4243

4344

4445
def main() -> None:
4546
scenario = random_scenario()
46-
inputs = generate_inputs(scenario)
47+
inputs = {
48+
"os_distribution": scenario.os_release.distribution,
49+
"os_release": scenario.os_release.release,
50+
"ssh_username": scenario.os_release.ssh_username,
51+
"neutron_plugin": scenario.neutron_plugin,
52+
"upgrade": scenario.upgrade,
53+
"stackhpc_kayobe_config_version": get_branch(scenario.openstack_release.version),
54+
"stackhpc_kayobe_config_previous_version": get_branch(scenario.openstack_release.previous_version),
55+
"terraform_kayobe_multinode_version": get_tkm_version(scenario.openstack_release.version),
56+
"terraform_kayobe_multinode_previous_version": get_tkm_version(scenario.openstack_release.previous_version),
57+
}
4758
for name, value in inputs.items():
4859
write_output(name, value)
4960

@@ -56,36 +67,15 @@ def random_scenario() -> Scenario:
5667
return Scenario(openstack_release, os_release, neutron_plugin, upgrade)
5768

5869

59-
def generate_inputs(scenario: Scenario) -> t.Dict[str, str]:
60-
branch = get_branch(scenario.openstack_release.version)
61-
previous_branch = get_branch(
62-
VERSION_HIERARCHY[
63-
VERSION_HIERARCHY.index(scenario.openstack_release.version) - 1
64-
]
65-
)
66-
terraform_kayobe_multinode_version = get_tkm_version(
67-
scenario.openstack_release.version
68-
)
69-
inputs = {
70-
"os_distribution": scenario.os_release.distribution,
71-
"os_release": scenario.os_release.release,
72-
"ssh_username": scenario.os_release.ssh_username,
73-
"neutron_plugin": scenario.neutron_plugin,
74-
"upgrade": scenario.upgrade,
75-
"stackhpc_kayobe_config_version": branch,
76-
"stackhpc_kayobe_config_previous_version": previous_branch,
77-
"terraform_kayobe_multinode_version": terraform_kayobe_multinode_version,
78-
}
79-
return inputs
80-
81-
8270
def get_branch(version: str) -> str:
8371
return f"stackhpc/{version}"
8472

8573

8674
def get_tkm_version(version: str) -> str:
87-
if version in ["zed", "2023.1"]:
75+
if version == "zed":
8876
return "ea61ea1730e179e05e8f0e58b759267664c555e7"
77+
elif version in ["2023.1", "2024.1"]:
78+
return "stackhpc/2024.1" # This version is targeted to support Caracal or older releases
8979
else:
9080
return "main"
9181

.github/workflows/stackhpc-multinode-periodic.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ jobs:
2020
stackhpc_kayobe_config_version: ${{ steps.generate-inputs.outputs.stackhpc_kayobe_config_version }}
2121
stackhpc_kayobe_config_previous_version: ${{ steps.generate-inputs.outputs.stackhpc_kayobe_config_previous_version }}
2222
terraform_kayobe_multinode_version: ${{ steps.generate-inputs.outputs.terraform_kayobe_multinode_version }}
23+
terraform_kayobe_multinode_previous_version: ${{ steps.generate-inputs.outputs.terraform_kayobe_multinode_previous_version }}
2324
steps:
2425
- name: Checkout
2526
uses: actions/checkout@v4
@@ -36,7 +37,7 @@ jobs:
3637
name: Multinode periodic
3738
needs:
3839
- generate-inputs
39-
uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/multinode.yml@1.4.0
40+
uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/multinode.yml@1.5.0
4041
with:
4142
multinode_name: mn-periodic-${{ github.run_id }}
4243
os_distribution: ${{ needs.generate-inputs.outputs.os_distribution }}
@@ -47,6 +48,7 @@ jobs:
4748
stackhpc_kayobe_config_version: ${{ needs.generate-inputs.outputs.stackhpc_kayobe_config_version }}
4849
stackhpc_kayobe_config_previous_version: ${{ needs.generate-inputs.outputs.stackhpc_kayobe_config_previous_version }}
4950
terraform_kayobe_multinode_version: ${{ needs.generate-inputs.outputs.terraform_kayobe_multinode_version }}
51+
terraform_kayobe_multinode_previous_version: ${{ needs.generate-inputs.outputs.terraform_kayobe_multinode_previous_version }}
5052
enable_slack_alert: true
5153
secrets: inherit
5254
if: github.repository == 'stackhpc/stackhpc-kayobe-config'

.github/workflows/stackhpc-multinode.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ name: Multinode
5656
jobs:
5757
multinode:
5858
name: Multinode
59-
uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/multinode.yml@1.4.1
59+
uses: stackhpc/stackhpc-openstack-gh-workflows/.github/workflows/multinode.yml@1.5.0
6060
with:
6161
multinode_name: ${{ inputs.multinode_name }}
6262
os_distribution: ${{ inputs.os_distribution }}
@@ -72,4 +72,5 @@ jobs:
7272
# NOTE(upgrade): Reference the PREVIOUS and CURRENT releases here.
7373
stackhpc_kayobe_config_previous_version: ${{ inputs.upgrade == 'major' && 'stackhpc/2024.1' || 'stackhpc/2025.1' }}
7474
terraform_kayobe_multinode_version: ${{ inputs.terraform_kayobe_multinode_version }}
75+
terraform_kayobe_multinode_previous_version: ${{ inputs.upgrade == 'major' && 'stackhpc/2024.1' || 'main' }}
7576
secrets: inherit

doc/source/operations/gpu-in-openstack.rst

Lines changed: 33 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -186,52 +186,6 @@ path using ``file`` as the url scheme e.g:
186186
187187
See :ref:`NVIDIA Role Configuration`.
188188

189-
.. _NVIDIA OS Configuration:
190-
191-
OS Configuration
192-
----------------
193-
194-
Host OS configuration is done by using roles in the `stackhpc.linux <https://github.com/stackhpc/ansible-collection-linux>`_ ansible collection.
195-
196-
Create a new playbook or update an existing on to apply the roles:
197-
198-
.. code-block:: yaml
199-
:caption: $KAYOBE_CONFIG_PATH/ansible/host-configure.yml
200-
201-
---
202-
- hosts: iommu
203-
tags:
204-
- iommu
205-
tasks:
206-
- import_role:
207-
name: stackhpc.linux.iommu
208-
handlers:
209-
- name: reboot
210-
set_fact:
211-
kayobe_needs_reboot: true
212-
213-
- hosts: vgpu
214-
tags:
215-
- vgpu
216-
tasks:
217-
- import_role:
218-
name: stackhpc.linux.vgpu
219-
handlers:
220-
- name: reboot
221-
set_fact:
222-
kayobe_needs_reboot: true
223-
224-
- name: Reboot when required
225-
hosts: iommu:vgpu
226-
tags:
227-
- reboot
228-
tasks:
229-
- name: Reboot
230-
reboot:
231-
reboot_timeout: 3600
232-
become: true
233-
when: kayobe_needs_reboot | default(false) | bool
234-
235189
Ansible Inventory Configuration
236190
-------------------------------
237191

@@ -276,7 +230,39 @@ hosts can automatically be mapped to these groups by configuring
276230
Role Configuration
277231
------------------
278232

279-
Configure the VGPU devices:
233+
Look up the supported VGPU devices (here we use an H100 as an example).
234+
``0000:06:00.0`` is the PCI address of the GPU itself. You can find this with
235+
``lspci | grep NVIDIA``.
236+
237+
.. code-block:: bash
238+
239+
# Find the supported mdev types
240+
ls /sys/class/mdev_bus/0000\:06\:00.0/mdev_supported_types/
241+
nvidia-1130 nvidia-1131 nvidia-1132 nvidia-1133 nvidia-1134 nvidia-1135 nvidia-1136 nvidia-1137 nvidia-1138 nvidia-1139 nvidia-1140 nvidia-1141 nvidia-1142 nvidia-1143 nvidia-1144
242+
243+
# Find the names of these types.
244+
cat /sys/class/mdev_bus/0000\:06\:00.0/mdev_supported_types/*/name
245+
NVIDIA H100XM-1-10CME
246+
NVIDIA H100XM-1-10C
247+
NVIDIA H100XM-1-20C
248+
NVIDIA H100XM-2-20C
249+
NVIDIA H100XM-3-40C
250+
NVIDIA H100XM-4-40C
251+
NVIDIA H100XM-7-80C
252+
NVIDIA H100XM-4C
253+
NVIDIA H100XM-5C
254+
NVIDIA H100XM-8C
255+
NVIDIA H100XM-10C
256+
NVIDIA H100XM-16C
257+
NVIDIA H100XM-20C
258+
NVIDIA H100XM-40C
259+
NVIDIA H100XM-80C
260+
261+
See
262+
`the NVIDIA VGPU user guide <https://docs.nvidia.com/vgpu/19.0/grid-vgpu-user-guide/index.html>`__`
263+
for details on device types.
264+
265+
Configure the VGPU devices (here we use an A100 as a different example).
280266

281267
.. code-block:: yaml
282268
:caption: $KAYOBE_CONFIG_PATH/inventory/group_vars/compute_vgpu/vgpu

etc/kayobe/ansible.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ inject_facts_as_vars = False
1010
callbacks_enabled = ansible.posix.profile_tasks
1111
# Silence warning about invalid characters found in group names
1212
force_valid_group_names = ignore
13+
# Default value plus custom filter plugins path
14+
filter_plugins = $ANSIBLE_HOME/plugins/filter:/usr/share/ansible/plugins/filter:$KAYOBE_CONFIG_PATH/ansible/filter_plugins/
1315

1416
[inventory]
1517
# Fail when any inventory source cannot be parsed.

etc/kayobe/ansible/deployment/wazuh-manager.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,15 @@
134134
ansible.builtin.file:
135135
path: "/usr/share/filebeat/module/wazuh/alerts/manifest.yml"
136136
mode: "go-w"
137+
notify:
138+
- Restart filebeat
137139

138140
handlers:
141+
- name: Restart filebeat
142+
ansible.builtin.service:
143+
name: filebeat
144+
state: restarted
145+
139146
- name: Restart wazuh
140147
ansible.builtin.service:
141148
name: wazuh-manager
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
3+
from ansible.errors import AnsibleFilterError
4+
5+
class FilterModule(object):
6+
def filters(self):
7+
return {
8+
'group_hostvars_by_var':
9+
self.group_hostvars_by_var,
10+
'get_hostvars_by_host':
11+
self.get_hostvars_by_host
12+
}
13+
14+
def group_hostvars_by_var(self, hostvars, var, subkey=None):
15+
"""
16+
Returns a dictionary where the keys are values for the
17+
specified var in hostvars, and the values are the hosts
18+
that match that value.
19+
20+
For example, a grouping of hosts by OS release might look like:
21+
distribution_release:
22+
noble:
23+
- node1
24+
- node2
25+
jammy:
26+
- node3
27+
- node4
28+
- node5
29+
30+
Some Ansible commands, such as ansible.builtin.command, return a
31+
dict rather than a single value. So 'subkey' is used for these cases
32+
to access the desired value.
33+
"""
34+
result = {}
35+
36+
for host in hostvars.keys():
37+
try:
38+
indiv_host_var = hostvars[host][var]
39+
if subkey is not None:
40+
indiv_host_var = indiv_host_var[subkey]
41+
result.setdefault(indiv_host_var, []).append(host)
42+
except KeyError as e:
43+
raise AnsibleFilterError(f"Variable {var} not found for host {host} in hostvars: {e}")
44+
45+
return result
46+
47+
def get_hostvars_by_host(self, hostvars, var, subkey=None):
48+
"""
49+
Returns a dictionary where the keys are hosts and the values
50+
are the values for the specified var in hostvars.
51+
52+
For example, the deployed containers by host might look like:
53+
deployed_containers:
54+
node1:
55+
- grafana
56+
- glance
57+
- nova
58+
- prometheus
59+
node2:
60+
- designate
61+
- neutron
62+
- nova
63+
64+
Some Ansible commands, such as ansible.builtin.command, return a
65+
dict rather than a single value. So 'subkey' is used for these cases
66+
to access the desired value.
67+
"""
68+
result = {}
69+
for host in hostvars.keys():
70+
try:
71+
indiv_host_var = hostvars[host][var]
72+
for key in indiv_host_var.keys():
73+
# Check if task to assign value was skipped
74+
if key == "skipped":
75+
result[host] = "No data"
76+
continue
77+
if subkey is not None:
78+
indiv_host_var = indiv_host_var[subkey]
79+
if indiv_host_var:
80+
result[host] = indiv_host_var
81+
else:
82+
result[host] = []
83+
84+
except KeyError as e:
85+
raise AnsibleFilterError(f"Variable {var} not found for host {host} in hostvars: {e}")
86+
87+
return result

0 commit comments

Comments
 (0)