Skip to content

Commit ac36647

Browse files
colin2328meta-codesync[bot]
authored andcommitted
fix ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory (#1616)
Summary: ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory to fix, we embeds RPATH entries into the compiled Rust shared libraries, telling the dynamic linker where to search for libpython3.10.so.1.0 at runtime (shown by D84991141 by shayne-fletcher) in order to built torchsys e2e (and handle both cuda 12.6 and 12.9), improve cuda path discovery also fix bug in device_mesh Pull Request resolved: #1616 Test Plan: pip install torch pip install "torchmonarch[examples]==0.1.0rc7" (monarch) [[email protected] ~/monarch (test-import)]$ python docs/source/examples/distributed_tensors.py Imported from GitHub, without a `Test Plan:` line. Reviewed By: shayne-fletcher Differential Revision: D85020138 Pulled By: colin2328 fbshipit-source-id: c701c762ea071cba513caf8d24f8f76b899ddbe8
1 parent feaa364 commit ac36647

File tree

5 files changed

+26
-18
lines changed

5 files changed

+26
-18
lines changed

build_utils/src/lib.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,14 @@ pub fn get_env_var_with_rerun(name: &str) -> Result<String, std::env::VarError>
103103
///
104104
/// This function attempts to locate CUDA installation through:
105105
/// 1. CUDA_HOME environment variable
106-
/// 2. CUDA_PATH environment variable
106+
/// 2. CUDA_PATH environment variable
107107
/// 3. Finding nvcc in PATH and deriving cuda home
108108
/// 4. Platform-specific default locations
109109
pub fn find_cuda_home() -> Option<String> {
110110
// Guess #1: Environment variables
111-
let mut cuda_home = env::var("CUDA_HOME")
111+
let mut cuda_home = get_env_var_with_rerun("CUDA_HOME")
112112
.ok()
113-
.or_else(|| env::var("CUDA_PATH").ok());
113+
.or_else(|| get_env_var_with_rerun("CUDA_PATH").ok());
114114

115115
if cuda_home.is_none() {
116116
// Guess #2: Find nvcc in PATH
@@ -151,14 +151,18 @@ pub fn discover_cuda_config() -> Result<CudaConfig, BuildError> {
151151
lib_dirs: Vec::new(),
152152
};
153153

154-
// Add standard include directory
155-
let include_dir = cuda_home_path.join("include");
156-
if include_dir.exists() {
157-
config.include_dirs.push(include_dir);
154+
// Add standard include directories
155+
// Check both old-style (include) and new-style (targets/x86_64-linux/include) CUDA installations
156+
for include_subdir in &["include", "targets/x86_64-linux/include"] {
157+
let include_dir = cuda_home_path.join(include_subdir);
158+
if include_dir.exists() {
159+
config.include_dirs.push(include_dir);
160+
}
158161
}
159162

160163
// Add standard library directories
161-
for lib_subdir in &["lib64", "lib", "lib/x64"] {
164+
// Check both old-style (lib64, lib) and new-style (targets/x86_64-linux/lib) CUDA installations
165+
for lib_subdir in &["lib64", "lib", "lib/x64", "targets/x86_64-linux/lib"] {
162166
let lib_dir = cuda_home_path.join(lib_subdir);
163167
if lib_dir.exists() {
164168
config.lib_dirs.push(lib_dir);
@@ -197,13 +201,12 @@ pub fn get_cuda_lib_dir() -> Result<String, BuildError> {
197201
// Try to deduce from CUDA configuration
198202
let cuda_config = discover_cuda_config()?;
199203
if let Some(cuda_home) = cuda_config.cuda_home {
200-
let lib64_path = cuda_home.join("lib64");
201-
if lib64_path.exists() {
202-
return Ok(lib64_path.to_string_lossy().to_string());
203-
}
204-
let lib_path = cuda_home.join("lib");
205-
if lib_path.exists() {
206-
return Ok(lib_path.to_string_lossy().to_string());
204+
// Check both old-style and new-style CUDA library paths
205+
for lib_subdir in &["lib64", "lib", "targets/x86_64-linux/lib"] {
206+
let lib_path = cuda_home.join(lib_subdir);
207+
if lib_path.exists() {
208+
return Ok(lib_path.to_string_lossy().to_string());
209+
}
207210
}
208211
}
209212

python/monarch/_src/actor/v1/proc_mesh.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def _spawn_nonblocking(
292292
return self._spawn_nonblocking_on(self._proc_mesh, name, Class, *args, **kwargs)
293293

294294
def to_table(self) -> str:
295-
return self._maybe_device_mesh.to_table()
295+
return self._device_mesh.to_table()
296296

297297
def _spawn_nonblocking_on(
298298
self,

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def run(self):
124124
# Always include the active env's lib (Conda-safe)
125125
conda_lib = os.path.join(sys.prefix, "lib")
126126

127+
# Only use LIBDIR if it actually contains the current libpython
127128
ldlib = sysconfig.get_config_var("LDLIBRARY") or ""
128129
libdir = sysconfig.get_config_var("LIBDIR") or ""
129130
py_lib = ""

torch-sys-cuda/build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ fn main() {
2828
let mut libtorch_lib_dir: Option<PathBuf> = None;
2929
let mut cxx11_abi = None;
3030
let mut cuda_home: Option<PathBuf> = None;
31-
let python_interpreter = PathBuf::from("python");
31+
let python_interpreter = std::env::var("PYO3_PYTHON")
32+
.map(PathBuf::from)
33+
.unwrap_or_else(|_| PathBuf::from("python"));
3234

3335
let use_pytorch_apis = build_utils::get_env_var_with_rerun("TORCH_SYS_USE_PYTORCH_APIS")
3436
.unwrap_or_else(|_| "1".to_owned());

torch-sys/build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ fn main() {
2626
let mut libtorch_include_dirs: Vec<PathBuf> = vec![];
2727
let mut libtorch_lib_dir: Option<PathBuf> = None;
2828
let mut cxx11_abi = None;
29-
let python_interpreter = PathBuf::from("python");
29+
let python_interpreter = std::env::var("PYO3_PYTHON")
30+
.map(PathBuf::from)
31+
.unwrap_or_else(|_| PathBuf::from("python"));
3032

3133
let use_pytorch_apis = build_utils::get_env_var_with_rerun("TORCH_SYS_USE_PYTORCH_APIS")
3234
.unwrap_or_else(|_| "1".to_owned());

0 commit comments

Comments
 (0)