From 6b1d059eda21c1bd421f3d352786fca2cab61954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 18 Jan 2025 05:18:37 +0100 Subject: [PATCH] Support ROCM builds from source distribution, and improve error handling (#1446) * Always update both submodules to include them in sdist Always update both submodules, irrespectively of whether a CUDA or a ROCM build is being done, to ensure that the necessary files from both are present in sdist. Otherwise, attempt to perform a ROCM build from sdist fails because of missing `composable_kernel` srouces. * Include `*.py` files from composable_kernel in sdist Include the `*.py` files from `csrc` in sdist, to ensure that the `generate.py` script is present. * Replace the `os.system()` calls in `setup.py` with `subprocess.run()` * Add error checking to `subprocess.run()` calls in `setup.py` Add error checking to ensure that `setup.py` fails immediately if one of the commands fail. Otherwise, the failures result only in messages to stderr that could be missed, and could lead to more confusing errors later in the build process. * Call git in `setup.py` only when working in a git repository Call git commands in `setup.py` only when the `.git` directory is present, indicating that we are working in a git checkout. Otherwise, just assert that the needed files are there. With this, building from a source distribution no longer attempts to call git in an incorrect directory. --- MANIFEST.in | 1 + setup.py | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 021b4d0f7..d3c4b4eda 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ recursive-include csrc *.h recursive-include csrc *.cuh recursive-include csrc *.cpp recursive-include csrc *.hpp +recursive-include csrc *.py recursive-include flash_attn *.cu recursive-include flash_attn *.h diff --git a/setup.py b/setup.py index a802a7e65..264b0eed5 100644 --- a/setup.py +++ b/setup.py @@ -145,11 +145,19 @@ def validate_and_update_archs(archs): # We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp # files included in the source distribution, in case the user compiles from source. -if IS_ROCM: - if not USE_TRITON_ROCM: - subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"]) +if os.path.isdir(".git"): + subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"], check=True) + subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"], check=True) else: - subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"]) + if IS_ROCM: + if not USE_TRITON_ROCM: + assert ( + os.path.exists("csrc/composable_kernel/example/ck_tile/01_fmha/generate.py") + ), "csrc/composable_kernel is missing, please use source distribution or git clone" + else: + assert ( + os.path.exists("csrc/cutlass/include/cutlass/cutlass.h") + ), "csrc/cutlass is missing, please use source distribution or git clone" if not SKIP_CUDA_BUILD and not IS_ROCM: print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) @@ -324,10 +332,10 @@ def validate_and_update_archs(archs): if not os.path.exists("./build"): os.makedirs("build") - os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd --output_dir build --receipt 2") - os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_appendkv --output_dir build --receipt 2") - os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_splitkv --output_dir build --receipt 2") - os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d bwd --output_dir build --receipt 2") + subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd", "--output_dir", "build", "--receipt", "2"], check=True) + subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_appendkv", "--output_dir", "build", "--receipt", "2"], check=True) + subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_splitkv", "--output_dir", "build", "--receipt", "2"], check=True) + subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "bwd", "--output_dir", "build", "--receipt", "2"], check=True) # Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h # See https://github.com/pytorch/pytorch/pull/70650