diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index a3d3f690133b0..82da0bd47841c 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -982,6 +982,11 @@ static bool alloc_tensor_range(struct ggml_context * ctx, } ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { + + if (buft == NULL) { + // Fall back to CPU buffer type + return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_cpu_buffer_type()); + } GGML_ASSERT(ggml_get_no_alloc(ctx) == true); size_t alignment = ggml_backend_buft_get_alignment(buft); diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 273075f4e5455..5b26aec7fca36 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -36,6 +36,11 @@ const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) { } ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { + if (buft == NULL) { + // Fall back to CPU buffer type + return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size); + } + if (size == 0) { // return a dummy buffer for zero-sized allocations return ggml_backend_buffer_init(buft, {}, NULL, 0); @@ -45,11 +50,20 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t } size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) { + if (buft == NULL) { + // Return a safe default alignment or use CPU buffer type's alignment + return ggml_backend_buft_get_alignment(ggml_backend_cpu_buffer_type()); + } return buft->iface.get_alignment(buft); } size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) { // get_max_size is optional, defaults to SIZE_MAX + if (buft == NULL) { + // Return a safe default (CPU buffer type's max size) + return ggml_backend_buft_get_max_size(ggml_backend_cpu_buffer_type()); + } + if (buft->iface.get_max_size) { return buft->iface.get_max_size(buft); } @@ -58,6 +72,11 @@ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) { size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) { // get_alloc_size is optional, defaults to ggml_nbytes + if (buft == NULL) { + // Return ggml_nbytes as fallback + return ggml_nbytes(tensor); + } + if (buft->iface.get_alloc_size) { size_t size = buft->iface.get_alloc_size(buft, tensor); assert(size >= ggml_nbytes(tensor)); @@ -67,6 +86,10 @@ size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct } bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) { + if (buft == NULL) { + return true; // CPU is host, so assume true for NULL + } + if (buft->iface.is_host) { return buft->iface.is_host(buft); } @@ -74,6 +97,9 @@ bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) { } ggml_backend_dev_t ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) { + if (buft == NULL) { + return NULL; + } return buft->device; } diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index ff53bdfbe171c..7ca4af0742deb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -293,30 +293,62 @@ struct vk_device_struct { std::unique_ptr perf_logger; #endif - ~vk_device_struct() { - VK_LOG_DEBUG("destroy device " << name); +~vk_device_struct() { + VK_LOG_DEBUG("destroy device " << name); + + if (device != VK_NULL_HANDLE) { + try { + // Only destroy fence if it's valid + if (fence != VK_NULL_HANDLE) { + device.destroyFence(fence); + fence = VK_NULL_HANDLE; + } - device.destroyFence(fence); + // Only destroy buffer if it exists + if (sync_staging != VK_NULL_HANDLE) { + ggml_vk_destroy_buffer(sync_staging); + sync_staging = VK_NULL_HANDLE; + } - ggml_vk_destroy_buffer(sync_staging); + // Check if command pool is valid before destroying + if (compute_queue.pool != VK_NULL_HANDLE) { + device.destroyCommandPool(compute_queue.pool); + compute_queue.pool = VK_NULL_HANDLE; + } - device.destroyCommandPool(compute_queue.pool); - if (!single_queue) { - device.destroyCommandPool(transfer_queue.pool); - } + // Only destroy transfer queue if using separate queues and it's valid + if (!single_queue && transfer_queue.pool != VK_NULL_HANDLE) { + device.destroyCommandPool(transfer_queue.pool); + transfer_queue.pool = VK_NULL_HANDLE; + } + + // Clean up pipelines safely + for (auto& pipeline : pipelines) { + if (pipeline.second.expired()) { + continue; + } - for (auto& pipeline : pipelines) { - if (pipeline.second.expired()) { - continue; + vk_pipeline pl = pipeline.second.lock(); + if (pl != nullptr) { + ggml_vk_destroy_pipeline(device, pl); + } } + pipelines.clear(); - vk_pipeline pl = pipeline.second.lock(); - ggml_vk_destroy_pipeline(device, pl); + // Finally destroy the device + device.destroy(); + device = VK_NULL_HANDLE; + } + catch (const std::exception& e) { + std::cerr << "Warning: Exception during Vulkan device cleanup: " << e.what() << std::endl; + // Continue with destruction despite errors + } + catch (...) { + std::cerr << "Warning: Unknown exception during Vulkan device cleanup" << std::endl; + // Continue with destruction despite errors } - pipelines.clear(); - - device.destroy(); } +} }; struct vk_buffer_struct { @@ -771,6 +803,8 @@ struct vk_instance_t { }; static bool vk_instance_initialized = false; +// Global flag to track if Vulkan initialization has failed +static bool g_vulkan_init_failed = false; static vk_instance_t vk_instance; #ifdef GGML_VULKAN_CHECK_RESULTS @@ -2252,32 +2286,37 @@ static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDevicePrope static vk_device ggml_vk_get_device(size_t idx) { VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")"); - if (vk_instance.devices[idx] == nullptr) { - VK_LOG_DEBUG("Initializing new vk_device"); - vk_device device = std::make_shared(); - vk_instance.devices[idx] = device; + if (g_vulkan_init_failed) { + return nullptr; + } + + try { + if (vk_instance.devices[idx] == nullptr) { + VK_LOG_DEBUG("Initializing new vk_device"); + vk_device device = std::make_shared(); + vk_instance.devices[idx] = device; #ifdef GGML_VULKAN_MEMORY_DEBUG - device->memory_logger = std::unique_ptr(new vk_memory_logger()); + device->memory_logger = std::unique_ptr(new vk_memory_logger()); #endif #ifdef GGML_VULKAN_PERF - device->perf_logger = std::unique_ptr(new vk_perf_logger()); + device->perf_logger = std::unique_ptr(new vk_perf_logger()); #endif size_t dev_num = vk_instance.device_indices[idx]; - std::vector physical_devices = vk_instance.instance.enumeratePhysicalDevices(); + std::vector physical_devices = vk_instance.instance.enumeratePhysicalDevices(); - if (dev_num >= physical_devices.size()) { - std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl; - throw std::runtime_error("Device not found"); - } + if (dev_num >= physical_devices.size()) { + std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl; + throw std::runtime_error("Device not found"); + } - device->physical_device = physical_devices[dev_num]; - const std::vector ext_props = device->physical_device.enumerateDeviceExtensionProperties(); + device->physical_device = physical_devices[dev_num]; + const std::vector ext_props = device->physical_device.enumerateDeviceExtensionProperties(); - const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY"); - device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr; + const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY"); + device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr; bool fp16_storage = false; bool fp16_compute = false; @@ -2689,7 +2728,29 @@ static vk_device ggml_vk_get_device(size_t idx) { device_extensions }; device_create_info.setPNext(&device_features2); - device->device = device->physical_device.createDevice(device_create_info); + try { + // Attempt to create device + device->device = device->physical_device.createDevice(device_create_info); + } + catch (const vk::ExtensionNotPresentError& ext_error) { + // Specific handling for extension not supported + std::cerr << "Vulkan Extension Error: " << ext_error.what() << std::endl; + std::cerr << "Critical extension not supported. Falling back to CPU backend." << std::endl; + + return nullptr; + } + catch (const vk::SystemError& sys_error) { + // Catch any other Vulkan system errors + std::cerr << "Vulkan Device Creation Error: " << sys_error.what() << std::endl; + std::cerr << "Failed to create Vulkan device. Falling back to CPU backend." << std::endl; + return nullptr; + } + catch (const std::exception& general_error) { + // Catch any standard exceptions + std::cerr << "Unexpected error during Vulkan device creation: " << general_error.what() << std::endl; + std::cerr << "Falling back to CPU backend." << std::endl; + return nullptr; + } // Queues ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false); @@ -2752,6 +2813,14 @@ static vk_device ggml_vk_get_device(size_t idx) { } return vk_instance.devices[idx]; + } + catch (const std::exception& e) { + // Set global flag on error + g_vulkan_init_failed = true; + return nullptr; + } + + } static void ggml_vk_print_gpu_info(size_t idx) { @@ -3043,22 +3112,74 @@ static void ggml_vk_instance_init() { } static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { - VK_LOG_DEBUG("ggml_vk_init(" << ctx->name << ", " << idx << ")"); - ggml_vk_instance_init(); - GGML_ASSERT(idx < vk_instance.device_indices.size()); - ctx->name = GGML_VK_NAME + std::to_string(idx); + if (g_vulkan_init_failed) { + ctx->device = nullptr; + return; + } + try { + VK_LOG_DEBUG("ggml_vk_init(" << ctx->name << ", " << idx << ")"); + ctx->device = nullptr; + // Wrap instance initialization in a try-catch block + try { + ggml_vk_instance_init(); + } + catch (const std::exception& instance_init_error) { + std::cerr << "Vulkan instance initialization failed: " + << instance_init_error.what() << std::endl; + + // Set device to nullptr to indicate initialization failure + ctx->device = nullptr; + return; + } - ctx->device = ggml_vk_get_device(idx); + // Check device index validity + if (idx >= vk_instance.device_indices.size()) { + std::cerr << "Invalid Vulkan device index: " << idx << std::endl; + ctx->device = nullptr; + return; + } - ctx->semaphore_idx = 0; - ctx->event_idx = 0; + ctx->name = GGML_VK_NAME + std::to_string(idx); - ctx->prealloc_size_x = 0; - ctx->prealloc_size_y = 0; - ctx->prealloc_size_split_k = 0; + // Attempt to get device with error handling + vk_device device = ggml_vk_get_device(idx); + + // Check if device initialization failed + if (nullptr == device) { + std::cerr << "Failed to initialize Vulkan device at index " << idx << std::endl; + ctx->device = nullptr; + return; + } + + ctx->device = device; + ctx->semaphore_idx = 0; + ctx->event_idx = 0; + ctx->prealloc_size_x = 0; + ctx->prealloc_size_y = 0; + ctx->prealloc_size_split_k = 0; - ctx->fence = ctx->device->device.createFence({}); + // CRITICAL: very explicit check before trying to create a fence + if (ctx->device == nullptr || ctx->device->device == VK_NULL_HANDLE) { + std::cerr << "WARNING: Device is null or invalid, skipping fence creation" << std::endl; + return; + } + + if (ctx->device) { + + // Wrap fence creation in try-catch to handle potential Vulkan errors + try { + ctx->fence = ctx->device->device.createFence({}); + } + catch (const vk::SystemError& fence_error) { + std::cerr << "Failed to create Vulkan fence: " << fence_error.what() << std::endl; + // Optionally, you might want to reset the device or handle this differently + ctx->device = nullptr; + // Set global flag + g_vulkan_init_failed = true; + return; + } + } #ifdef GGML_VULKAN_CHECK_RESULTS const char* skip_checks = getenv("GGML_VULKAN_SKIP_CHECKS"); @@ -3066,6 +3187,15 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { const char* output_tensor = getenv("GGML_VULKAN_OUTPUT_TENSOR"); vk_output_tensor = (output_tensor == NULL ? 0 : atoi(output_tensor)); #endif + } + catch (const std::exception& unexpected_error) { + // Catch-all for any unexpected errors + std::cerr << "Unexpected error during Vulkan initialization: " + << unexpected_error.what() << std::endl; + + // Ensure device is set to nullptr to indicate initialization failure + ctx->device = nullptr; + } } static vk_pipeline ggml_vk_get_to_fp16(ggml_backend_vk_context * ctx, ggml_type type) { @@ -8032,11 +8162,28 @@ static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_typ } ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num) { - ggml_vk_instance_init(); - VK_LOG_DEBUG("ggml_backend_vk_buffer_type(" << dev_num << ")"); + // Check if Vulkan initialization previously failed + if (g_vulkan_init_failed) { + return nullptr; + } + try { + ggml_vk_instance_init(); + } catch (const std::exception& e) { + VK_LOG_DEBUG("ggml_backend_vk_buffer_type: Vulkan instance init failed: " << e.what()); + g_vulkan_init_failed = true; + return nullptr; + } + + VK_LOG_DEBUG("ggml_backend_vk_buffer_type(" << dev_num << ")"); + vk_device dev = ggml_vk_get_device(dev_num); + if (!dev) { + VK_LOG_DEBUG("ggml_backend_vk_buffer_type: Failed to get device " << dev_num); + g_vulkan_init_failed = true; + return nullptr; + } return &dev->buffer_type; } @@ -8092,6 +8239,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer // Should be changed to return device-specific host buffer type // but that probably requires changes in llama.cpp ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { + + // Check if Vulkan initialization previously failed + if (g_vulkan_init_failed) { + // Return CPU buffer type as fallback when Vulkan fails + return ggml_backend_cpu_buffer_type(); + } + static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = { /* .iface = */ { /* .get_name = */ ggml_backend_vk_host_buffer_type_name, @@ -8105,11 +8259,24 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { /* .context = */ nullptr, }; - // Make sure device 0 is initialized - ggml_vk_instance_init(); - ggml_vk_get_device(0); + // Make sure device 0 is initialized + try { + ggml_vk_instance_init(); + vk_device dev = ggml_vk_get_device(0); - return &ggml_backend_vk_buffer_type_host; + if (!dev) { + g_vulkan_init_failed = true; + return ggml_backend_cpu_buffer_type(); + } + + // Only set the device if initialization succeeded + ggml_backend_vk_buffer_type_host.device = ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0); + + return &ggml_backend_vk_buffer_type_host; + } catch (const std::exception& e) { + g_vulkan_init_failed = true; + return ggml_backend_cpu_buffer_type(); + } } @@ -8342,17 +8509,60 @@ static ggml_guid_t ggml_backend_vk_guid() { ggml_backend_t ggml_backend_vk_init(size_t dev_num) { VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")"); - ggml_backend_vk_context * ctx = new ggml_backend_vk_context; - ggml_vk_init(ctx, dev_num); + // First check if Vulkan initialization previously failed + if (g_vulkan_init_failed) { + std::cerr << "Vulkan initialization previously failed, skipping.\n"; + return nullptr; + } - ggml_backend_t vk_backend = new ggml_backend { - /* .guid = */ ggml_backend_vk_guid(), - /* .interface = */ ggml_backend_vk_interface, - /* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num), - /* .context = */ ctx, - }; + VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")"); + + ggml_backend_vk_context* ctx = new ggml_backend_vk_context; + + try { + // Initialize Vulkan context + ggml_vk_init(ctx, dev_num); + + // Check if device initialization failed + if (!ctx->device) { + std::cerr << "Vulkan device initialization failed. Falling back to CPU backend." << std::endl; + + // Cleanup Vulkan context + delete ctx; + return nullptr; + } + + // Create Vulkan backend + ggml_backend_t vk_backend = new ggml_backend { + /* .guid = */ ggml_backend_vk_guid(), + /* .interface = */ ggml_backend_vk_interface, + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num), + /* .context = */ ctx, + }; + + return vk_backend; + } + catch (const std::exception& e) { + + g_vulkan_init_failed = true; + // Catch any unexpected errors during initialization + std::cerr << "Critical error in Vulkan backend initialization: " + << e.what() << ". Falling back to CPU backend." << std::endl; + + // Cleanup Vulkan context + delete ctx; + return nullptr; + } + catch (...) { - return vk_backend; + g_vulkan_init_failed = true; + // Catch any unknown errors + std::cerr << "Unknown error during Vulkan backend initialization. Falling back to CPU backend." << std::endl; + + // Cleanup Vulkan context + delete ctx; + return nullptr; + } } bool ggml_backend_is_vk(ggml_backend_t backend) { @@ -8739,6 +8949,12 @@ static size_t ggml_backend_vk_reg_get_device_count(ggml_backend_reg_t reg) { } static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, size_t device) { + + // Check global flag first + if (g_vulkan_init_failed) { + return nullptr; + } + static std::vector devices; static bool initialized = false; @@ -8763,6 +8979,9 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, initialized = true; } } + if (devices.empty() || device >= devices.size()) { + return nullptr; + } GGML_ASSERT(device < devices.size()); return devices[device]; @@ -8776,6 +8995,10 @@ static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = { }; ggml_backend_reg_t ggml_backend_vk_reg() { + + if (g_vulkan_init_failed) { + return nullptr; + } static ggml_backend_reg reg = { /* .api_version = */ GGML_BACKEND_API_VERSION, /* .iface = */ ggml_backend_vk_reg_i, @@ -8785,6 +9008,7 @@ ggml_backend_reg_t ggml_backend_vk_reg() { ggml_vk_instance_init(); return ® } catch (const vk::SystemError& e) { + g_vulkan_init_failed = true; VK_LOG_DEBUG("ggml_backend_vk_reg() -> Error: System error: " << e.what()); return nullptr; } diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 1da4eae7e63e2..14f91ee4c39e2 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -235,15 +235,28 @@ using buft_list_t = std::vector CPU extra -> GPU host -> CPU @@ -304,30 +317,57 @@ static buft_list_t make_cpu_buft_list(const std::vector & de static buft_list_t make_gpu_buft_list(ggml_backend_dev_t dev, enum llama_split_mode split_mode, const float * tensor_split) { buft_list_t buft_list; - // add the device split buffer type if requested and available - if (split_mode == LLAMA_SPLIT_MODE_ROW) { - ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); - auto ggml_backend_split_buffer_type_fn = (ggml_backend_split_buffer_type_t) - ggml_backend_reg_get_proc_address(reg, "ggml_backend_split_buffer_type"); - if (ggml_backend_split_buffer_type_fn) { - size_t dev_index = [&]() { - auto * reg = ggml_backend_dev_backend_reg(dev); - for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); ++i) { - if (ggml_backend_reg_dev_get(reg, i) == dev) { - return i; + // Try to add device buffer types, but be prepared for failures + try { + // Check if the device is valid/available + if (dev == nullptr) { + return buft_list; // Return empty list if device is null + } + + // add the device split buffer type if requested and available + if (split_mode == LLAMA_SPLIT_MODE_ROW) { + ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); + if (reg != nullptr) { + auto ggml_backend_split_buffer_type_fn = (ggml_backend_split_buffer_type_t) + ggml_backend_reg_get_proc_address(reg, "ggml_backend_split_buffer_type"); + if (ggml_backend_split_buffer_type_fn) { + size_t dev_index = 0; + bool found = false; + + // Find device index more safely + for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); ++i) { + if (ggml_backend_reg_dev_get(reg, i) == dev) { + dev_index = i; + found = true; + break; + } + } + + if (found) { + auto * buft = ggml_backend_split_buffer_type_fn(dev_index, tensor_split); + if (buft != nullptr) { + buft_list.emplace_back(dev, buft); + } } } - throw std::runtime_error(format("device %s not found in its backend reg", ggml_backend_dev_name(dev))); - }(); - auto * buft = ggml_backend_split_buffer_type_fn(dev_index, tensor_split); - if (buft != nullptr) { - buft_list.emplace_back(dev, buft); } } - } - // add the device default buffer type - buft_list.emplace_back(dev, ggml_backend_dev_buffer_type(dev)); + // add the device default buffer type if it's available + ggml_backend_buffer_type_t dev_buft = ggml_backend_dev_buffer_type(dev); + if (dev_buft != nullptr) { + buft_list.emplace_back(dev, dev_buft); + } + } + catch (const std::exception& e) { + // Log the error but continue + const char* dev_name = dev ? ggml_backend_dev_name(dev) : "unknown"; + //std::cerr << "Error adding buffer types for device " << dev_name << ": " << e.what() << std::endl; + //std::cerr << "Will fall back to other available buffer types" << std::endl; + + // Return an empty list which will be filled with other buffer types later + buft_list.clear(); + } return buft_list; } diff --git a/src/llama.cpp b/src/llama.cpp index 607f278615969..ef4463f095e65 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -9701,10 +9701,12 @@ struct llama_context * llama_init_from_model( ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr); if (backend == nullptr) { LLAMA_LOG_ERROR("%s: failed to initialize %s backend\n", __func__, ggml_backend_dev_name(dev)); - llama_free(ctx); - return nullptr; + //llama_free(ctx); + //return nullptr; + } + else { + ctx->backends.emplace_back(backend); } - ctx->backends.emplace_back(backend); } // add ACCEL backends (such as BLAS)