Skip to content

Commit

Permalink
etl/runtime: update supported ETL python runtime versions
Browse files Browse the repository at this point in the history
* add python 3.9, 3.11, 3.12, 3.13
* default to python 3.13
* remove python 3.8

Signed-off-by: Tony Chen <[email protected]>
  • Loading branch information
Nahemah1022 committed Feb 7, 2025
1 parent c9f8b9d commit da2fc6d
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-python-etl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
test:
strategy:
matrix:
python-version: ['3.8','3.9','3.10','3.11','3.12','3.13']
python-version: ['3.9','3.10','3.11','3.12','3.13']
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
6 changes: 3 additions & 3 deletions ais/test/etl_stress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def transform(input_bytes):
msg := etl.InitCodeMsg{
InitMsgBase: etl.InitMsgBase{IDX: "etl-build-conn-err", Timeout: etlBucketTimeout},
Code: []byte(timeoutFunc),
Runtime: runtime.Py38,
Runtime: runtime.Py39,
ChunkSize: 0,
}
msg.Funcs.Transform = "transform"
Expand Down Expand Up @@ -177,11 +177,11 @@ def transform(input_bytes):
{name: "spec-echo-golang", ty: etl.Spec, etlSpecName: tetl.EchoGolang},

{
name: "code-echo-py38",
name: "code-echo-py313",
ty: etl.Code,
etlCodeMsg: etl.InitCodeMsg{
Code: []byte(echoPythonTransform),
Runtime: runtime.Py38,
Runtime: runtime.Py313,
ChunkSize: 0,
},
},
Expand Down
6 changes: 3 additions & 3 deletions ais/test/etl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,9 +611,9 @@ def transform(input_bytes: bytes) -> bytes:
chunkSize int64
onlyLong bool
}{
{etlName: "simple-py38", code: md5, deps: "", runtime: runtime.Py38, onlyLong: false},
{etlName: "simple-py38-stream", code: echo, deps: "", runtime: runtime.Py38, onlyLong: false, chunkSize: 64},
{etlName: "with-deps-py38", code: numpy, deps: numpyDeps, runtime: runtime.Py38, onlyLong: false},
{etlName: "simple-py39", code: md5, deps: "", runtime: runtime.Py39, onlyLong: false},
{etlName: "simple-py39-stream", code: echo, deps: "", runtime: runtime.Py39, onlyLong: false, chunkSize: 64},
{etlName: "with-deps-py39", code: numpy, deps: numpyDeps, runtime: runtime.Py39, onlyLong: false},
{etlName: "simple-py310-io", code: md5IO, deps: "", runtime: runtime.Py310, commType: etl.HpushStdin, onlyLong: false},
}
)
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/cli/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ var (
}
runtimeFlag = cli.StringFlag{
Name: "runtime",
Usage: "Python version used to run the provided code (currently supported: python3.8v2, python3.10v2, python3.11v2)",
Usage: "Python version used to run the provided code (currently supported: python3.9v2, python3.10v2, python3.11v2, python3.12v2, python3.13v2)",
Required: true,
}
commTypeFlag = cli.StringFlag{
Expand Down
4 changes: 1 addition & 3 deletions docs/_posts/2021-10-22-ais-etl-2.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ if __name__ == "__main__":

## Initializing

We will use `python3` (`python:3.8.5`) *runtime* to install `torch` and `torchvision` packages.

> [runtime](https://github.com/NVIDIA/ais-etl/tree/master/runtime) contains a predefined work environment in which the provided code/script will be run. We do support `python2` (`python:2.7.18`) and more runtimes are planned in the future.
We will use `python3` (`python:3.9`) *runtime* to install `torch` and `torchvision` packages.

To make sure that `code.py` (above) can have its imports, the following (`deps.txt`) dependencies must be installed:

Expand Down
4 changes: 3 additions & 1 deletion docs/etl.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,11 @@ Currently, the following runtimes are supported:

| Name | Description |
| --- | --- |
| `python3.8v2` | `python:3.8` is used to run the code. |
| `python3.9v2` | `python:3.9` is used to run the code. |
| `python3.10v2` | `python:3.10` is used to run the code. |
| `python3.11v2` | `python:3.11` is used to run the code. |
| `python3.12v2` | `python:3.12` is used to run the code. |
| `python3.13v2` | `python:3.13` is used to run the code. |

More *runtimes* will be added in the future, with plans to support the most popular ETL toolchains.
Still, since the number of supported *runtimes* will always remain somewhat limited, there's always the second way: build your ETL container and deploy it via [*init spec* request](#init-spec-request).
Expand Down
22 changes: 16 additions & 6 deletions ext/etl/runtime/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ import (
)

const (
Py38 = "python3.8v2"
Py39 = "python3.9v2"
Py310 = "python3.10v2"
Py311 = "python3.11v2"
Py312 = "python3.12v2"
Py313 = "python3.13v2"
)

type (
Expand All @@ -25,9 +27,11 @@ type (
DepsEnvName() string
}
runbase struct{}
py38 struct{ runbase }
py39 struct{ runbase }
py310 struct{ runbase }
py311 struct{ runbase }
py312 struct{ runbase }
py313 struct{ runbase }
)

var (
Expand All @@ -51,8 +55,8 @@ func GetNames() (names []string) {
}

func init() {
all = make(map[string]runtime, 3)
for _, r := range []runtime{py38{}, py310{}, py311{}} {
all = make(map[string]runtime, 5)
for _, r := range []runtime{py39{}, py310{}, py311{}, py312{}, py313{}} {
if _, ok := all[r.Name()]; ok {
debug.Assert(false, "duplicate type "+r.Name())
} else {
Expand All @@ -65,11 +69,17 @@ func (runbase) CodeEnvName() string { return "AISTORE_CODE" }
func (runbase) DepsEnvName() string { return "AISTORE_DEPS" }

// container images: "aistorage/runtime_python:<TAG>"
func (py38) Name() string { return Py38 }
func (py38) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.8v2") }
func (py39) Name() string { return Py39 }
func (py39) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.9v2") }

func (py310) Name() string { return Py310 }
func (py310) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.10v2") }

func (py311) Name() string { return Py311 }
func (py311) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.11v2") }

func (py312) Name() string { return Py312 }
func (py312) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.12v2") }

func (py313) Name() string { return Py313 }
func (py313) PodSpec() string { return strings.ReplaceAll(pyPodSpec, "<TAG>", "3.13v2") }
2 changes: 2 additions & 0 deletions python/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ We structure this changelog in accordance with [Keep a Changelog](https://keepac

- Update project metadata and improve descriptions in `pyproject.toml`.

- Update supported ETL Python runtime versions, and set the default runtime version to Python 3.13.

## [1.11.0] - 2025-02-06

### Added
Expand Down
4 changes: 2 additions & 2 deletions python/aistore/sdk/etl/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def init_code(
preimported_modules (list[str]): Modules to import before running the transform function. This can
be necessary in cases where the modules used both attempt to import each other circularly
runtime (str): [optional, default= V2 implementation of the current python version if supported, else
python3.8v2] Runtime environment of the ETL [choose from: python3.8v2, python3.10v2, python3.11v2]
(see ext/etl/runtime/all.go)
python3.13v2] Runtime environment of the ETL [choose from: python3.9v2, python3.10v2, python3.11v2,
python3.12v2, python3.13v2] (see ext/etl/runtime/all.go)
communication_type (str): [optional, default="hpush"] Communication type of the ETL (options: hpull, hrev,
hpush, io)
timeout (str): [optional, default="5m"] Timeout of the ETL job (e.g. 5m for 5 minutes)
Expand Down
4 changes: 2 additions & 2 deletions python/aistore/sdk/etl/etl_const.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Defaults
DEFAULT_ETL_COMM = "hpush"
DEFAULT_ETL_TIMEOUT = "5m"
DEFAULT_ETL_RUNTIME = "python3.8v2"
DEFAULT_ETL_RUNTIME = "python3.13v2"

# ETL comm types
# ext/etl/api.go Hpush
Expand All @@ -16,7 +16,7 @@
ETL_COMM_CODE = [ETL_COMM_IO, ETL_COMM_HPUSH, ETL_COMM_HREV, ETL_COMM_HPULL]
ETL_COMM_SPEC = [ETL_COMM_HPUSH, ETL_COMM_HREV, ETL_COMM_HPULL]

ETL_SUPPORTED_PYTHON_VERSIONS = ["3.10", "3.11"]
ETL_SUPPORTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]

# templates for ETL

Expand Down
8 changes: 5 additions & 3 deletions python/tests/unit/sdk/test_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ def init_spec_exec_assert(self, expected_action, **kwargs):

def test_init_code_default_runtime(self):
version_to_runtime = {
(3, 7): "python3.8v2",
(3, 1234): "python3.8v2",
(3, 8): "python3.8v2",
(3, 7): "python3.13v2",
(3, 1234): "python3.13v2",
(3, 8): "python3.13v2",
(3, 10): "python3.10v2",
(3, 11): "python3.11v2",
(3, 12): "python3.12v2",
(3, 13): "python3.13v2",
}
for version, runtime in version_to_runtime.items():
with patch.object(aistore.sdk.etl.etl.sys, "version_info") as version_info:
Expand Down

0 comments on commit da2fc6d

Please sign in to comment.