@@ -5790,6 +5790,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
5790
5790
CUDA_CHECK (cudaFree (ptr));
5791
5791
}
5792
5792
5793
+ static bool g_cublas_loaded = false ;
5794
+
5795
+ bool ggml_cublas_loaded (void ) {
5796
+ return g_cublas_loaded;
5797
+ }
5793
5798
5794
5799
void ggml_init_cublas () {
5795
5800
static bool initialized = false ;
@@ -5803,7 +5808,12 @@ void ggml_init_cublas() {
5803
5808
CUDA_CHECK (cudaDeviceSynchronize ());
5804
5809
#endif
5805
5810
5806
- CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
5811
+ if (cudaGetDeviceCount (&g_device_count) != cudaSuccess) {
5812
+ initialized = true ;
5813
+ g_cublas_loaded = false ;
5814
+ return ;
5815
+ }
5816
+
5807
5817
GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
5808
5818
int64_t total_vram = 0 ;
5809
5819
#if defined(GGML_CUDA_FORCE_MMQ)
@@ -5851,6 +5861,7 @@ void ggml_init_cublas() {
5851
5861
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
5852
5862
5853
5863
initialized = true ;
5864
+ g_cublas_loaded = true ;
5854
5865
}
5855
5866
}
5856
5867
@@ -7158,6 +7169,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
7158
7169
}
7159
7170
7160
7171
bool ggml_cuda_can_mul_mat (const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
7172
+ if (!g_cublas_loaded) return false ;
7173
+
7161
7174
const int64_t ne10 = src1->ne [0 ];
7162
7175
7163
7176
const int64_t ne0 = dst->ne [0 ];
@@ -7843,6 +7856,8 @@ void ggml_cuda_free_scratch() {
7843
7856
}
7844
7857
7845
7858
bool ggml_cuda_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor) {
7859
+ if (!g_cublas_loaded) return false ;
7860
+
7846
7861
ggml_cuda_func_t func;
7847
7862
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
7848
7863
|| (tensor->src [0 ] != nullptr && (tensor->src [0 ]->backend == GGML_BACKEND_GPU || tensor->src [0 ]->backend == GGML_BACKEND_GPU_SPLIT))
0 commit comments