Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
134 commits
Select commit Hold shift + click to select a range
db0d7a6
create telemetry group
Aditya-DP Jul 7, 2025
9314fff
enable idrac telemetry k8s
Aditya-DP Jul 7, 2025
b8da3aa
telemetry host update
Aditya-DP Jul 7, 2025
9a89eeb
telemetry host update
Aditya-DP Jul 7, 2025
3bf496b
Merge branch 'pub/service_k8s_telemetry' of https://github.com/Aditya…
Aditya-DP Jul 7, 2025
077b45e
update k8s pod check
Aditya-DP Jul 7, 2025
41c70d5
mysql db pod node ip and nodeport
Aditya-DP Jul 7, 2025
635521b
Mysql pod svc nodeport
Aditya-DP Jul 7, 2025
047eec3
mysql svc update
Aditya-DP Jul 7, 2025
7d16056
Merge branch 'dell:pub/service_k8s_telemetry' into pub/service_k8s_te…
Aditya-DP Jul 7, 2025
6e5fce6
git repo dir path update
Aditya-DP Jul 8, 2025
216443f
Merge branch 'pub/service_k8s_telemetry' of https://github.com/Aditya…
Aditya-DP Jul 8, 2025
5f9cde1
lint fix
Aditya-DP Jul 8, 2025
7e89f75
telemetry update
Aditya-DP Jul 8, 2025
42b2f47
ansible host update
Aditya-DP Jul 8, 2025
143782d
update addhost
Aditya-DP Jul 8, 2025
f9d4389
host update
Aditya-DP Jul 8, 2025
844e3bc
Merge remote-tracking branch 'upstream/pub/service_k8s_telemetry' int…
Aditya-DP Jul 9, 2025
27a04c3
update telemetry restart
abhishek-sa1 Jul 10, 2025
384c2f8
update rescue
abhishek-sa1 Jul 10, 2025
c9a20b1
idrac telemetry stateful set
Aditya-DP Jul 10, 2025
b805ecd
adding the base code for powerscale plugins
sakshi-singla-1735 Jul 10, 2025
fdf7737
initial chnages
sakshi-singla-1735 Jul 10, 2025
ed6b397
Merge pull request #3268 from dell/staging
abhishek-sa1 Jul 11, 2025
9da5df6
Merge branch 'dell:pub/service_cluster_telemetry' into pub/service_cl…
abhishek-sa1 Jul 11, 2025
914972a
updated code for plugings
Katakam-Rakesh Jul 10, 2025
763dec1
adding the callingbmethode for csi in scheduler
sakshi-singla-1735 Jul 14, 2025
0f8c476
update telemetry
abhishek-sa1 Jul 14, 2025
fc44297
Update main.yml
abhishek-sa1 Jul 14, 2025
c115d3d
update commit id
abhishek-sa1 Jul 15, 2025
3ecb28f
Update main.yml
abhishek-sa1 Jul 15, 2025
565816a
telemetry pod update
Aditya-DP Jul 15, 2025
0f2555d
Merge branch 'dell:pub/service_cluster_telemetry' into pub/service_cl…
Aditya-DP Jul 15, 2025
ae915e1
Merge pull request #3285 from dell/staging
abhishek-sa1 Jul 15, 2025
0b7ae11
Merge branch 'dell:pub/service_cluster_telemetry' into pub/service_cl…
abhishek-sa1 Jul 15, 2025
6266545
Update main.yml
abhishek-sa1 Jul 15, 2025
2dbd598
Update main.yml
abhishek-sa1 Jul 15, 2025
aa4b43a
Update ansible.cfg
Aditya-DP Jul 15, 2025
59318d5
Update ansible.cfg
Aditya-DP Jul 15, 2025
46eabf9
Update ansible.cfg
Aditya-DP Jul 15, 2025
5e69bdf
Merge pull request #3286 from Aditya-DP/defect_fix
abhishek-sa1 Jul 15, 2025
ef27313
Merge branch 'dell:pub/service_cluster_telemetry' into pub/service_cl…
abhishek-sa1 Jul 15, 2025
ff212a9
restore enable telemetry changes
Aditya-DP Jul 15, 2025
3441988
Update idrac_telemetry_deployment.yml
Aditya-DP Jul 15, 2025
4f34cca
Update idrac_telemetry_receiver_init.sh.j2
abhishek-sa1 Jul 15, 2025
63c0368
Update idrac_telemetry_receiver_init.sh.j2
abhishek-sa1 Jul 15, 2025
b769a38
Merge branch 'dell:pub/service_cluster_telemetry' into pub/service_cl…
Aditya-DP Jul 16, 2025
5180200
Dynamically updating replica count usnig node count
Aditya-DP Jul 16, 2025
69c2111
lint fix
Aditya-DP Jul 16, 2025
dee4087
Added the validation code for the secret and value yaml file for csi
sakshi-singla-1735 Jul 16, 2025
8bf8736
Update main.yml
Aditya-DP Jul 16, 2025
ce39826
Update idrac_telemetry_deployment.yml
Aditya-DP Jul 16, 2025
53bef19
removing unused vars
Aditya-DP Jul 16, 2025
560514b
Modified the input validation for secret file as it will be encrypted
sakshi-singla-1735 Jul 16, 2025
b48c42b
Merge pull request #3266 from abhishek-sa1/pub/service_cluster_telemetry
priti-parate Jul 16, 2025
0e84050
Merge pull request #3295 from dell/staging
abhishek-sa1 Jul 16, 2025
4117abe
updated code
Katakam-Rakesh Jul 11, 2025
d977c57
Merge pull request #3287 from Aditya-DP/pub/service_cluster_telemetry
abhishek-sa1 Jul 17, 2025
a778ef8
updating kube prometheus stack code in telemetry
Katakam-Rakesh Jul 11, 2025
74d8fae
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 17, 2025
a086154
updating github workflow
Katakam-Rakesh Jul 17, 2025
be6f21f
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 17, 2025
c9dfdeb
updating condition
Katakam-Rakesh Jul 17, 2025
38b2b3d
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 18, 2025
ee71021
fixed review comments
Katakam-Rakesh Jul 18, 2025
32551de
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 18, 2025
7b27401
Merge pull request #3304 from dell/pub/service_cluster_telemetry
abhishek-sa1 Jul 18, 2025
99801be
Merge branch 'pub/k8s_plugins' into pub/k8s_plugins
abhishek-sa1 Jul 18, 2025
65e1b0d
Merge pull request #3297 from Katakam-Rakesh/pub/k8s_plugins
snarthan Jul 18, 2025
b047456
updated compute k8s changes in local_repo
Katakam-Rakesh Jul 18, 2025
6a70a70
Merge branch 'pub/compute_k8s' of https://github.com/dell/omnia into …
Katakam-Rakesh Jul 18, 2025
1a5aa21
updated conditions for kube prometheus
Katakam-Rakesh Jul 18, 2025
90f4111
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 21, 2025
6e9243e
Update telemetry.yml
Katakam-Rakesh Jul 21, 2025
0507e57
Merge pull request #3310 from Katakam-Rakesh/pub/k8s_plugins
abhishek-sa1 Jul 21, 2025
bb6c50f
changed code to use ansible standard k8s libraries
sakshi-singla-1735 Jul 21, 2025
6e04316
Merge branch 'pub/k8s_plugins' into pub/k8s_plugins
sakshi-singla-1735 Jul 21, 2025
84c4cbc
updated compute k8s code in inventory validation
Katakam-Rakesh Jul 18, 2025
66da2a0
Merge branch 'pub/compute_k8s' of https://github.com/Katakam-Rakesh/o…
Katakam-Rakesh Jul 21, 2025
69c21b0
updated code
Katakam-Rakesh Jul 21, 2025
2e8e7fc
Merge branch 'pub/compute_k8s' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 21, 2025
f169479
chnages wrt powerscale service_k8s_deployment
sakshi-singla-1735 Jul 22, 2025
a11e524
ansible lint fixes
sakshi-singla-1735 Jul 22, 2025
b03e0dc
chnaging vault path
sakshi-singla-1735 Jul 22, 2025
ae3dd2c
separate file for csi validation
sakshi-singla-1735 Jul 22, 2025
04554cc
separating csi validation
sakshi-singla-1735 Jul 22, 2025
e26b832
updated code
Katakam-Rakesh Jul 21, 2025
9da62e4
Merge branch 'pub/compute_k8s' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 22, 2025
75644bc
added code for storage class
sakshi-singla-1735 Jul 23, 2025
475acab
adding the csi innexamples file
sakshi-singla-1735 Jul 24, 2025
7637f91
updating github workflow
Katakam-Rakesh Jul 22, 2025
de52954
Merge branch 'pub/compute_k8s' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 25, 2025
cb54d3d
Update scheduler.yml
sakshi-singla-1735 Jul 28, 2025
7add8e1
Update service_k8s_cluster.yml
sakshi-singla-1735 Jul 28, 2025
61e6cbd
fix: fixed ROCm package installation
balajikumaran-c-s Jul 17, 2025
a406bfa
lint fix
balajikumaran-c-s Jul 18, 2025
3c7239a
Defect fixes
jagadeeshnv Jul 20, 2025
384395e
input config handling ha case
jagadeeshnv Jul 21, 2025
6a85aef
ansible lint fix
Aditya-DP Jul 22, 2025
dd314f4
Update configure_postscripts_additional_softwares.yml
Aditya-DP Jul 22, 2025
4a9a774
lint fix
Aditya-DP Jul 22, 2025
0a3ddc6
ignore lint issue
Aditya-DP Jul 22, 2025
6d0f01e
fix: fixed ROCm package installation
balajikumaran-c-s Jul 17, 2025
774fb9d
lint fix
balajikumaran-c-s Jul 18, 2025
514e59a
Defect fixes
jagadeeshnv Jul 20, 2025
f1067cf
input config handling ha case
jagadeeshnv Jul 21, 2025
77daabf
ansible lint fix
Aditya-DP Jul 22, 2025
00344e7
Update configure_postscripts_additional_softwares.yml
Aditya-DP Jul 22, 2025
addcb33
lint fix
Aditya-DP Jul 22, 2025
4624848
ignore lint issue
Aditya-DP Jul 22, 2025
d65bb6a
Merge branch 'pub/compute_k8s' into pub/compute_k8s
Katakam-Rakesh Jul 29, 2025
419b556
Merge pull request #3273 from sakshi-singla-1735/pub/k8s_plugins
snarthan Jul 29, 2025
38d53b9
updated code to make csi as default storage class
Katakam-Rakesh Jul 29, 2025
baad4e4
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 29, 2025
869f9e1
updating csi code
Katakam-Rakesh Jul 29, 2025
3a1a24e
Merge branch 'pub/k8s_plugins' of github.com:Katakam-Rakesh/omnia int…
Katakam-Rakesh Jul 29, 2025
50f450d
Merge pull request #3340 from Katakam-Rakesh/pub/k8s_plugins
snarthan Jul 29, 2025
e1d3de6
pulp with docker creds
Nagachandan-P Jul 29, 2025
4d96d84
Merge pull request #3328 from Katakam-Rakesh/pub/compute_k8s
snarthan Jul 30, 2025
b2fbe7c
Merge branch 'pub/k8s_plugins' into pub/compute_k8s
snarthan Jul 30, 2025
7e4a9b9
Merge pull request #3348 from dell/pub/compute_k8s
snarthan Jul 30, 2025
4d81976
initial creds commit nd conflicts
sakshi-singla-1735 Jul 31, 2025
47aabfa
Update csi_driver_validation.py
sakshi-singla-1735 Jul 31, 2025
3c6f619
Merge branch 'dell:pub/k8s_plugins' into pub/k8s_plugins
Nagachandan-P Aug 5, 2025
05ae829
Merge pull request #3346 from Nagachandan-P/pub/k8s_plugins
snarthan Aug 5, 2025
c4302b9
csi patch fix
sakshi-singla-1735 Aug 5, 2025
6be21d5
Delete scheduler/inv
sakshi-singla-1735 Aug 5, 2025
94492c5
input validation with updated compute_k8s for HA
jagadeeshnv Aug 5, 2025
6c62422
csi creds update
sakshi-singla-1735 Aug 5, 2025
b7f2221
Merge pull request #3368 from jagadeeshnv/pub/k8s_plugins
jagadeeshnv Aug 5, 2025
d31417e
ansible lint and pylint fixes
sakshi-singla-1735 Aug 6, 2025
2c1a7ce
review comment
sakshi-singla-1735 Aug 6, 2025
a7dff8d
Merge pull request #3372 from sakshi-singla-1735/pub/k8s_plugins
snarthan Aug 6, 2025
f50ee4f
Merge branch 'pub/slurm_multi_arch' into pub/k8s_plugins
jagadeeshnv Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ansible-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
- pub/multiple_login_node
- pub/local_repo_arch
- pub/k8s_plugins
- pub/slurm_multi_arch

jobs:
build:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
- pub/multiple_login_node
- pub/local_repo_arch
- pub/k8s_plugins
- pub/slurm_multi_arch

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion accelerator/ansible.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ connect_timeout = 180

[ssh_connection]
retries = 3
ssh_args = -o ControlMaster=auto -o ControlPersist=60 -o ConnectTimeout=60
ssh_args = -o ControlMaster=auto -o ControlPersist=60 -o ConnectTimeout=60
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
files["omnia_config"],
files["high_availability_config"]
],
"k8s": [
"compute_k8s": [
files['roles_config'],
files["omnia_config"],
files["high_availability_config"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,17 @@
"maxLength": 128,
"pattern": "^(?!admin$)[^\\\\\\-'\"]+$",
"description": "Password for grafana UI. Should not be kept 'admin. Length must be at least 5 characters and must not contain backslashes (\\), hyphens (-), single quotes ('), or double quotes (\\\")."
},
"csi_username": {
"minLength": 4,
"maxLength": 64,
"description": "Username for Powerscale UI. Must not contain semicolons (;), square brackets ([]), or backticks (`).",
"pattern": "^[^;\\[\\]`]+$"
},
"csi_password": {
"description": "Password for Powerscale UI. Must not contain hyphens (-), single quotes ('), double quotes (\"), at symbols (@), or backslashes (\\).",
"minLength": 5,
"maxLength": 32,
"pattern": "^[^\\-\\'\\\"@\\\\]*$"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@
"k8s_offline_install": {
"type": "boolean",
"description": "Whether to pull packages/images from local repo."
},
"csi_powerscale_driver_secret_file_path": {
"description": "Absolute file path for the secret.yaml file.",
"type": "string",
"pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$"
},
"csi_powerscale_driver_values_file_path": {
"description": "File path for the values.yaml file.",
"type": "string",
"pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$"
}
},
"required": [
Expand All @@ -74,6 +84,20 @@
"then": {
"required": ["topology_manager_scope"]
}
},
{
"if": {
"properties": {
"csi_powerscale_driver_secret_file_path": {
"type": "string",
"minLength": 1
}
},
"required": ["csi_powerscale_driver_secret_file_path"]
},
"then": {
"required": ["csi_powerscale_driver_values_file_path"]
}
}
]
}
Expand Down Expand Up @@ -122,6 +146,16 @@
"k8s_offline_install": {
"type": "boolean",
"description": "Whether to pull packages/images from local repo."
},
"csi_powerscale_driver_secret_file_path": {
"description": "Absolute file path for the secret.yaml file.",
"type": "string",
"pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$"
},
"csi_powerscale_driver_values_file_path": {
"description": "File path for the values.yaml file.",
"type": "string",
"pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$"
}
},
"required": [
Expand All @@ -137,6 +171,20 @@
"then": {
"required": ["topology_manager_scope"]
}
},
{
"if": {
"properties": {
"csi_powerscale_driver_secret_file_path": {
"type": "string",
"minLength": 1
}
},
"required": ["csi_powerscale_driver_secret_file_path"]
},
"then": {
"required": ["csi_powerscale_driver_values_file_path"]
}
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
"title": "Telemetry Configuration",
"type": "object",
"properties": {
"kube_prometheus_support": {
"type": "boolean"
},
"prometheus_scrape_interval": {
"type": "integer",
"minimum": 1,
"default": 15
},

"idrac_telemetry_support": {
"type": "boolean"
},
Expand All @@ -17,6 +26,8 @@
}
},
"required": [
"kube_prometheus_support",
"prometheus_scrape_interval",
"idrac_telemetry_support",
"visualization_support",
"federated_idrac_telemetry_collection"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
"""
import json
import os
import ipaddress
import yaml
import ipaddress
import subprocess
from ast import literal_eval
import ansible.module_utils.input_validation.common_utils.data_fetch as get
from ansible.module_utils.input_validation.validation_flows import csi_driver_validation
import ansible.module_utils.input_validation.common_utils.data_validation as validate
from ansible.modules.validate_input import generate_log_failure_message

from ansible.module_utils.input_validation.common_utils import (
validation_utils,
config,
Expand All @@ -32,7 +32,6 @@
)

from ansible.module_utils.input_validation.validation_flows import scheduler_validation

from ansible.module_utils.local_repo.software_utils import (
load_json,
set_version_variables,
Expand Down Expand Up @@ -947,7 +946,11 @@ def is_ip_in_range(ip_str, ip_range_str):
except ValueError:
return False

def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors):



def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors,
omnia_base_dir, project_name, logger, module, input_file_path):
"""
Validates Kubernetes cluster configurations.

Expand All @@ -962,10 +965,10 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro
bmc_static_range = admin_bmc_networks["bmc_network"]["static_range"]
bmc_dynamic_range = admin_bmc_networks["bmc_network"]["dynamic_range"]
primary_oim_admin_ip = admin_bmc_networks["admin_network"]["primary_oim_admin_ip"]

# service_k8s_cluster = data["service_k8s_cluster"]
cluster_set = {}
if "k8s" in softwares and "k8s" in tag_names:
if "compute_k8s" in softwares and "compute_k8s" in tag_names:
cluster_set["compute_k8s_cluster"] = data.get(
"compute_k8s_cluster", [])
if "service_k8s" in softwares and "service_k8s" in tag_names:
Expand Down Expand Up @@ -997,7 +1000,7 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro
f"{cluster_name} not found in high_availability_config.yml"
))
pod_external_ip_range = kluster.get("pod_external_ip_range")
if not pod_external_ip_range:
if not pod_external_ip_range or str(pod_external_ip_range).strip() == "":
errors.append(
create_error_msg(
"Pod External IP Range -",
Expand Down Expand Up @@ -1031,7 +1034,41 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro
create_error_msg(
"IP overlap -",
None,
en_us_validation_msg.IP_OVERLAP_FAIL_MSG))
en_us_validation_msg.IP_OVERLAP_FAIL_MSG))

#csi validation
if (
"csi_driver_powerscale" in softwares
and ("k8s" in softwares or "service_k8s" in softwares)
):

csi_secret_file_path = kluster.get("csi_powerscale_driver_secret_file_path")
csi_values_file_path = kluster.get("csi_powerscale_driver_values_file_path")

# Validate secret file path
if not csi_secret_file_path or \
not csi_secret_file_path.strip() or \
not os.path.exists(csi_secret_file_path.strip()):
errors.append(
create_error_msg(
"csi_powerscale_driver_secret_file_path",
csi_secret_file_path,
en_us_validation_msg.CSI_DRIVER_SECRET_FAIL_MSG,
)
)
else:
# If secret path is valid, ensure values path is also valid
if not csi_values_file_path or \
not csi_values_file_path.strip() or \
not os.path.exists(csi_values_file_path.strip()):
errors.append(
create_error_msg(
"csi_powerscale_driver_values_file_path",
csi_values_file_path,
en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG,
)
)
csi_driver_validation.validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors, input_file_path)

def validate_omnia_config(
input_file_path,
Expand Down Expand Up @@ -1077,8 +1114,8 @@ def validate_omnia_config(
)
)

if ("k8s" in sw_list or "service_k8s" in sw_list) and \
("k8s" in tag_names or "service_k8s" in tag_names):
if ("compute_k8s" in sw_list or "service_k8s" in sw_list) and \
("compute_k8s" in tag_names or "service_k8s" in tag_names):
admin_bmc_networks = get_admin_bmc_networks(
input_file_path, logger, module, omnia_base_dir, module_utils_base, project_name)
ha_config_path = create_file_path(
Expand All @@ -1087,7 +1124,8 @@ def validate_omnia_config(
ha_config = yaml.safe_load(f)
for k in ["service_k8s_cluster_ha", "compute_k8s_cluster_ha"]:
ha_config[k] = [xha["cluster_name"] for xha in ha_config.get(k, [])]
validate_k8s(data, admin_bmc_networks, sw_list, ha_config, tag_names, errors)
validate_k8s(data, admin_bmc_networks, sw_list, ha_config, tag_names,
errors, omnia_base_dir, project_name, logger, module, input_file_path)
return errors

def validate_telemetry_config(
Expand Down Expand Up @@ -1233,3 +1271,4 @@ def validate_additional_software(
)
)
return errors

Loading