diff --git a/tools/pytorchjob-generator/chart/README.md b/tools/pytorchjob-generator/chart/README.md index 887b95e..08b5c9c 100644 --- a/tools/pytorchjob-generator/chart/README.md +++ b/tools/pytorchjob-generator/chart/README.md @@ -41,6 +41,7 @@ customize the Jobs generated by the tool. | Key | Type | Default | Description | |-----|------|---------|-------------| | environmentVariables | array | `nil` | List of variables/values to be defined for all the ranks. Values can be literals or references to Kuberetes secrets or configmaps. See [values.yaml](values.yaml) for examples of supported syntaxes. NOTE: The following standard [PyTorch Distributed environment variables](https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization) are set automatically and can be referenced in the commands without being set manually: WORLD_SIZE, RANK, MASTER_ADDR, MASTER_PORT. | +| envFrom | array | `nil` | List of ConfigMaps or Secrets specifying environment variables. See [values.yaml](values.yaml) for examples of supported syntaxes. NOTE: the environmentVariables field takes precedence over envFrom. mlbatch also performs some automatic checks on the environmentVariables passed by the user, such as checking that the user does not specify NCCL_TOPO_FILE when topologyFileConfigMap is also provided. These checks are *not* performed on any environment variables inherited from envFrom. | | sshGitCloneConfig | object | `nil` | Private GitHub clone support. See [values.yaml](values.yaml) for additional instructions. | | setupCommands | array | no custom commands are executed | List of custom commands to be ran at the beginning of the execution. Use `setupCommand` to clone code, download data, and change directories. | | mainProgram | string | `nil` | Name of the PyTorch program to be executed by `torchrun`. Please provide your program name here and NOT in "setupCommands" as this helm template provides the necessary "torchrun" arguments for the parallel execution. WARNING: this program is relative to the current path set by change-of-directory commands in "setupCommands". If no value is provided; then only `setupCommands` are executed and torchrun is elided. | diff --git a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml b/tools/pytorchjob-generator/chart/templates/appwrapper.yaml index 7702e3e..aef07e9 100644 --- a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml +++ b/tools/pytorchjob-generator/chart/templates/appwrapper.yaml @@ -116,6 +116,10 @@ spec: {{- include "mlbatch.volumes" . | indent 38 }} containers: - name: pytorch + {{- if .Values.envFrom }} + envFrom: + {{- toYaml .Values.envFrom | nindent 46 }} + {{- end }} image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }} imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} {{- include "mlbatch.securityContext" . | indent 44 }} @@ -139,6 +143,10 @@ spec: {{- include "mlbatch.volumes" . | indent 38 }} containers: - name: pytorch + {{- if .Values.envFrom }} + envFrom: + {{- toYaml .Values.envFrom | nindent 46 }} + {{- end }} image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }} imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} {{- include "mlbatch.securityContext" . | indent 44 }} diff --git a/tools/pytorchjob-generator/chart/values.schema.json b/tools/pytorchjob-generator/chart/values.schema.json index 9bebe8f..9e60cdf 100644 --- a/tools/pytorchjob-generator/chart/values.schema.json +++ b/tools/pytorchjob-generator/chart/values.schema.json @@ -42,6 +42,10 @@ { "type": "null" }, { "type": "array" } ]}, + "envFrom": { "oneOf": [ + { "type": "null" }, + { "type": "array" } + ]}, "sshGitCloneConfig": { "oneOf": [ { "type": "null" }, { diff --git a/tools/pytorchjob-generator/chart/values.yaml b/tools/pytorchjob-generator/chart/values.yaml index 0b60656..586df0e 100644 --- a/tools/pytorchjob-generator/chart/values.yaml +++ b/tools/pytorchjob-generator/chart/values.yaml @@ -101,6 +101,23 @@ environmentVariables: # name: configmap-name # key: configmap-key + +# -- (array) List of ConfigMaps or Secrets specifying environment variables. See +# [values.yaml](values.yaml) for examples of supported syntaxes. +# +# NOTE: the environmentVariables field takes precedence over envFrom. mlbatch also performs some +# automatic checks on the environmentVariables passed by the user, such as checking that the user +# does not specify NCCL_TOPO_FILE when topologyFileConfigMap is also provided. These checks are +# *not* performed on any environment variables inherited from envFrom. +# @section -- Workload Specification +envFrom: +# - secretRef +# name: my-secrets +# - secretRef +# name: my-other-secrets +# - configMapRef +# name: my-config-map + # Private GitHub clone support. # # 0) Create a secret and configMap to enable Private GitHub cloning as documented for your organization.