From 90d9fd77cf4fb448404fc4e24be8da657b0b4cda Mon Sep 17 00:00:00 2001 From: Robert Clark Date: Thu, 16 Jan 2025 14:52:07 -0600 Subject: [PATCH] Set k8s imagePullPolicy to Always To prevent certain corner cases where images could be pushed with the same tag to a container registry, the imagePullPolicy for k8s jobs should use "Always" to ensure the container registry is always checked for newer versions of the container, even if one is already cached locally. This shouldn't negatively impact performance as it won't re-pull an image if it already exists locally and there isn't a newer one available on the registry. Signed-Off-By: Robert Clark --- launcher_scripts/nemo_launcher/core/v2/step_k8s.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/launcher_scripts/nemo_launcher/core/v2/step_k8s.py b/launcher_scripts/nemo_launcher/core/v2/step_k8s.py index f21b26b15..35fa8f629 100644 --- a/launcher_scripts/nemo_launcher/core/v2/step_k8s.py +++ b/launcher_scripts/nemo_launcher/core/v2/step_k8s.py @@ -142,6 +142,7 @@ def create_pytorchjob_resource( container = V1Container( name="pytorch", image=image, + image_pull_policy="Always", command=command, args=args, env=to_env_list(env), @@ -256,6 +257,7 @@ def create_mpijob_resource( launch_container = V1Container( name="mpi-launcher", image=image, + image_pull_policy="Always", command=command, args=args, env=to_env_list(env), @@ -265,6 +267,7 @@ def create_mpijob_resource( worker_container = V1Container( name="mpi-worker", image=image, + image_pull_policy="Always", command=["/usr/sbin/sshd"], args=["-De"], env=to_env_list(env),