diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index a3d3f690133b0..82da0bd47841c 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -982,6 +982,11 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
 }
 
 ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+
+    if (buft == NULL) {
+        // Fall back to CPU buffer type
+        return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_cpu_buffer_type());
+    }
     GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
 
     size_t alignment = ggml_backend_buft_get_alignment(buft);
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 273075f4e5455..5b26aec7fca36 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -36,6 +36,11 @@ const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
 }
 
 ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
+    if (buft == NULL) {
+        // Fall back to CPU buffer type
+        return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
+    }
+
     if (size == 0) {
         // return a dummy buffer for zero-sized allocations
         return ggml_backend_buffer_init(buft, {}, NULL, 0);
@@ -45,11 +50,20 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t
 }
 
 size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
+    if (buft == NULL) {
+        // Return a safe default alignment or use CPU buffer type's alignment
+        return ggml_backend_buft_get_alignment(ggml_backend_cpu_buffer_type());
+    }
     return buft->iface.get_alignment(buft);
 }
 
 size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
     // get_max_size is optional, defaults to SIZE_MAX
+    if (buft == NULL) {
+        // Return a safe default (CPU buffer type's max size)
+        return ggml_backend_buft_get_max_size(ggml_backend_cpu_buffer_type());
+    }
+
     if (buft->iface.get_max_size) {
         return buft->iface.get_max_size(buft);
     }
@@ -58,6 +72,11 @@ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
 
 size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
     // get_alloc_size is optional, defaults to ggml_nbytes
+    if (buft == NULL) {
+        // Return ggml_nbytes as fallback
+        return ggml_nbytes(tensor);
+    }
+
     if (buft->iface.get_alloc_size) {
         size_t size = buft->iface.get_alloc_size(buft, tensor);
         assert(size >= ggml_nbytes(tensor));
@@ -67,6 +86,10 @@ size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct
 }
 
 bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
+    if (buft == NULL) {
+        return true; // CPU is host, so assume true for NULL
+    }
+
     if (buft->iface.is_host) {
         return buft->iface.is_host(buft);
     }
@@ -74,6 +97,9 @@ bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
 }
 
 ggml_backend_dev_t ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) {
+    if (buft == NULL) {
+        return NULL;
+    }
     return buft->device;
 }
 
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index ff53bdfbe171c..7ca4af0742deb 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -293,30 +293,62 @@ struct vk_device_struct {
     std::unique_ptr<vk_perf_logger> perf_logger;
 #endif
 
-    ~vk_device_struct() {
-        VK_LOG_DEBUG("destroy device " << name);
+~vk_device_struct() {
+    VK_LOG_DEBUG("destroy device " << name);
+
+    if (device != VK_NULL_HANDLE) {
+        try {
+            // Only destroy fence if it's valid
+            if (fence != VK_NULL_HANDLE) {
+                device.destroyFence(fence);
+                fence = VK_NULL_HANDLE;
+            }
 
-        device.destroyFence(fence);
+            // Only destroy buffer if it exists
+            if (sync_staging != VK_NULL_HANDLE) {
+                ggml_vk_destroy_buffer(sync_staging);
+                sync_staging = VK_NULL_HANDLE;
+            }
 
-        ggml_vk_destroy_buffer(sync_staging);
+            // Check if command pool is valid before destroying
+            if (compute_queue.pool != VK_NULL_HANDLE) {
+                device.destroyCommandPool(compute_queue.pool);
+                compute_queue.pool = VK_NULL_HANDLE;
+            }
 
-        device.destroyCommandPool(compute_queue.pool);
-        if (!single_queue) {
-            device.destroyCommandPool(transfer_queue.pool);
-        }
+            // Only destroy transfer queue if using separate queues and it's valid
+            if (!single_queue && transfer_queue.pool != VK_NULL_HANDLE) {
+                device.destroyCommandPool(transfer_queue.pool);
+                transfer_queue.pool = VK_NULL_HANDLE;
+            }
+
+            // Clean up pipelines safely
+            for (auto& pipeline : pipelines) {
+                if (pipeline.second.expired()) {
+                    continue;
+                }
 
-        for (auto& pipeline : pipelines) {
-            if (pipeline.second.expired()) {
-                continue;
+                vk_pipeline pl = pipeline.second.lock();
+                if (pl != nullptr) {
+                    ggml_vk_destroy_pipeline(device, pl);
+                }
             }
+            pipelines.clear();
 
-            vk_pipeline pl = pipeline.second.lock();
-            ggml_vk_destroy_pipeline(device, pl);
+            // Finally destroy the device
+            device.destroy();
+            device = VK_NULL_HANDLE;
+        }
+        catch (const std::exception& e) {
+            std::cerr << "Warning: Exception during Vulkan device cleanup: " << e.what() << std::endl;
+            // Continue with destruction despite errors
+        }
+        catch (...) {
+            std::cerr << "Warning: Unknown exception during Vulkan device cleanup" << std::endl;
+            // Continue with destruction despite errors
         }
-        pipelines.clear();
-
-        device.destroy();
     }
+}
 };
 
 struct vk_buffer_struct {
@@ -771,6 +803,8 @@ struct vk_instance_t {
 };
 
 static bool vk_instance_initialized = false;
+// Global flag to track if Vulkan initialization has failed
+static bool g_vulkan_init_failed = false;
 static vk_instance_t vk_instance;
 
 #ifdef GGML_VULKAN_CHECK_RESULTS
@@ -2252,32 +2286,37 @@ static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDevicePrope
 static vk_device ggml_vk_get_device(size_t idx) {
     VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
 
-    if (vk_instance.devices[idx] == nullptr) {
-        VK_LOG_DEBUG("Initializing new vk_device");
-        vk_device device = std::make_shared<vk_device_struct>();
-        vk_instance.devices[idx] = device;
+    if (g_vulkan_init_failed) {
+        return nullptr;
+    }
+
+    try {
+        if (vk_instance.devices[idx] == nullptr) {
+            VK_LOG_DEBUG("Initializing new vk_device");
+            vk_device device = std::make_shared<vk_device_struct>();
+            vk_instance.devices[idx] = device;
 
 #ifdef GGML_VULKAN_MEMORY_DEBUG
-        device->memory_logger = std::unique_ptr<vk_memory_logger>(new vk_memory_logger());
+            device->memory_logger = std::unique_ptr<vk_memory_logger>(new vk_memory_logger());
 #endif
 #ifdef GGML_VULKAN_PERF
-        device->perf_logger = std::unique_ptr<vk_perf_logger>(new vk_perf_logger());
+            device->perf_logger = std::unique_ptr<vk_perf_logger>(new vk_perf_logger());
 #endif
 
         size_t dev_num = vk_instance.device_indices[idx];
 
-        std::vector<vk::PhysicalDevice> physical_devices = vk_instance.instance.enumeratePhysicalDevices();
+            std::vector<vk::PhysicalDevice> physical_devices = vk_instance.instance.enumeratePhysicalDevices();
 
-        if (dev_num >= physical_devices.size()) {
-            std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl;
-            throw std::runtime_error("Device not found");
-        }
+            if (dev_num >= physical_devices.size()) {
+                std::cerr << "ggml_vulkan: Device with index " << dev_num << " does not exist." << std::endl;
+                throw std::runtime_error("Device not found");
+            }
 
-        device->physical_device = physical_devices[dev_num];
-        const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
+            device->physical_device = physical_devices[dev_num];
+            const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
 
-        const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
-        device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
+            const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
+            device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
 
         bool fp16_storage = false;
         bool fp16_compute = false;
@@ -2689,7 +2728,29 @@ static vk_device ggml_vk_get_device(size_t idx) {
             device_extensions
         };
         device_create_info.setPNext(&device_features2);
-        device->device = device->physical_device.createDevice(device_create_info);
+        try {
+            // Attempt to create device
+            device->device = device->physical_device.createDevice(device_create_info);
+        }
+        catch (const vk::ExtensionNotPresentError& ext_error) {
+            // Specific handling for extension not supported
+            std::cerr << "Vulkan Extension Error: " << ext_error.what() << std::endl;
+            std::cerr << "Critical extension not supported. Falling back to CPU backend." << std::endl;
+
+            return nullptr;
+        }
+        catch (const vk::SystemError& sys_error) {
+            // Catch any other Vulkan system errors
+            std::cerr << "Vulkan Device Creation Error: " << sys_error.what() << std::endl;
+            std::cerr << "Failed to create Vulkan device. Falling back to CPU backend." << std::endl;
+            return nullptr;
+        }
+        catch (const std::exception& general_error) {
+            // Catch any standard exceptions
+            std::cerr << "Unexpected error during Vulkan device creation: " << general_error.what() << std::endl;
+            std::cerr << "Falling back to CPU backend." << std::endl;
+            return nullptr;
+        }
 
         // Queues
         ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false);
@@ -2752,6 +2813,14 @@ static vk_device ggml_vk_get_device(size_t idx) {
     }
 
     return vk_instance.devices[idx];
+    }
+    catch (const std::exception& e) {
+        // Set global flag on error
+        g_vulkan_init_failed = true;
+        return nullptr;
+    }
+
+
 }
 
 static void ggml_vk_print_gpu_info(size_t idx) {
@@ -3043,22 +3112,74 @@ static void ggml_vk_instance_init() {
 }
 
 static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
-    VK_LOG_DEBUG("ggml_vk_init(" << ctx->name << ", " << idx << ")");
-    ggml_vk_instance_init();
-    GGML_ASSERT(idx < vk_instance.device_indices.size());
 
-    ctx->name = GGML_VK_NAME + std::to_string(idx);
+    if (g_vulkan_init_failed) {
+        ctx->device = nullptr;
+        return;
+    }
+    try {
+        VK_LOG_DEBUG("ggml_vk_init(" << ctx->name << ", " << idx << ")");
+        ctx->device = nullptr;
+        // Wrap instance initialization in a try-catch block
+        try {
+            ggml_vk_instance_init();
+        }
+        catch (const std::exception& instance_init_error) {
+            std::cerr << "Vulkan instance initialization failed: " 
+                      << instance_init_error.what() << std::endl;
+
+            // Set device to nullptr to indicate initialization failure
+            ctx->device = nullptr;
+            return;
+        }
 
-    ctx->device = ggml_vk_get_device(idx);
+        // Check device index validity
+        if (idx >= vk_instance.device_indices.size()) {
+            std::cerr << "Invalid Vulkan device index: " << idx << std::endl;
+            ctx->device = nullptr;
+            return;
+        }
 
-    ctx->semaphore_idx = 0;
-    ctx->event_idx = 0;
+        ctx->name = GGML_VK_NAME + std::to_string(idx);
 
-    ctx->prealloc_size_x = 0;
-    ctx->prealloc_size_y = 0;
-    ctx->prealloc_size_split_k = 0;
+        // Attempt to get device with error handling
+        vk_device device = ggml_vk_get_device(idx);
+
+        // Check if device initialization failed
+        if (nullptr == device) {
+            std::cerr << "Failed to initialize Vulkan device at index " << idx << std::endl;
+            ctx->device = nullptr;
+            return;
+        }
+
+        ctx->device = device;
+        ctx->semaphore_idx = 0;
+        ctx->event_idx = 0;
+        ctx->prealloc_size_x = 0;
+        ctx->prealloc_size_y = 0;
+        ctx->prealloc_size_split_k = 0;
 
-    ctx->fence = ctx->device->device.createFence({});
+         // CRITICAL: very explicit check before trying to create a fence
+        if (ctx->device == nullptr || ctx->device->device == VK_NULL_HANDLE) {
+            std::cerr << "WARNING: Device is null or invalid, skipping fence creation" << std::endl;
+            return;
+        }
+
+        if (ctx->device) {
+
+        // Wrap fence creation in try-catch to handle potential Vulkan errors
+        try {
+            ctx->fence = ctx->device->device.createFence({});
+        }
+        catch (const vk::SystemError& fence_error) {
+            std::cerr << "Failed to create Vulkan fence: " << fence_error.what() << std::endl;
+            // Optionally, you might want to reset the device or handle this differently
+            ctx->device = nullptr;
+            // Set global flag
+            g_vulkan_init_failed = true;
+            return;
+        }
+        }
 
 #ifdef GGML_VULKAN_CHECK_RESULTS
     const char* skip_checks = getenv("GGML_VULKAN_SKIP_CHECKS");
@@ -3066,6 +3187,15 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
     const char* output_tensor = getenv("GGML_VULKAN_OUTPUT_TENSOR");
     vk_output_tensor = (output_tensor == NULL ? 0 : atoi(output_tensor));
 #endif
+    }
+    catch (const std::exception& unexpected_error) {
+        // Catch-all for any unexpected errors
+        std::cerr << "Unexpected error during Vulkan initialization: " 
+                  << unexpected_error.what() << std::endl;
+        
+        // Ensure device is set to nullptr to indicate initialization failure
+        ctx->device = nullptr;
+    }
 }
 
 static vk_pipeline ggml_vk_get_to_fp16(ggml_backend_vk_context * ctx, ggml_type type) {
@@ -8032,11 +8162,28 @@ static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_typ
 }
 
 ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num) {
-    ggml_vk_instance_init();
 
-    VK_LOG_DEBUG("ggml_backend_vk_buffer_type(" << dev_num << ")");
+    // Check if Vulkan initialization previously failed
+    if (g_vulkan_init_failed) {
+        return nullptr;
+    }
 
+    try {
+        ggml_vk_instance_init();
+    } catch (const std::exception& e) {
+        VK_LOG_DEBUG("ggml_backend_vk_buffer_type: Vulkan instance init failed: " << e.what());
+        g_vulkan_init_failed = true;
+        return nullptr;
+    }
+
+    VK_LOG_DEBUG("ggml_backend_vk_buffer_type(" << dev_num << ")");
+    
     vk_device dev = ggml_vk_get_device(dev_num);
+    if (!dev) {
+        VK_LOG_DEBUG("ggml_backend_vk_buffer_type: Failed to get device " << dev_num);
+        g_vulkan_init_failed = true;
+        return nullptr;
+    }
 
     return &dev->buffer_type;
 }
@@ -8092,6 +8239,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
 // Should be changed to return device-specific host buffer type
 // but that probably requires changes in llama.cpp
 ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
+
+    // Check if Vulkan initialization previously failed
+    if (g_vulkan_init_failed) {
+        // Return CPU buffer type as fallback when Vulkan fails
+        return ggml_backend_cpu_buffer_type();
+    }
+
     static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = {
         /* .iface    = */ {
             /* .get_name         = */ ggml_backend_vk_host_buffer_type_name,
@@ -8105,11 +8259,24 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
         /* .context  = */ nullptr,
     };
 
-    // Make sure device 0 is initialized
-    ggml_vk_instance_init();
-    ggml_vk_get_device(0);
+     // Make sure device 0 is initialized
+    try {
+        ggml_vk_instance_init();
+        vk_device dev = ggml_vk_get_device(0);
 
-    return &ggml_backend_vk_buffer_type_host;
+        if (!dev) {
+            g_vulkan_init_failed = true;
+            return ggml_backend_cpu_buffer_type();
+        }
+
+        // Only set the device if initialization succeeded
+        ggml_backend_vk_buffer_type_host.device = ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0);
+
+        return &ggml_backend_vk_buffer_type_host;
+    } catch (const std::exception& e) {
+        g_vulkan_init_failed = true;
+        return ggml_backend_cpu_buffer_type();
+    }
 }
 
 
@@ -8342,17 +8509,60 @@ static ggml_guid_t ggml_backend_vk_guid() {
 ggml_backend_t ggml_backend_vk_init(size_t dev_num) {
     VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")");
 
-    ggml_backend_vk_context * ctx = new ggml_backend_vk_context;
-    ggml_vk_init(ctx, dev_num);
+        // First check if Vulkan initialization previously failed
+    if (g_vulkan_init_failed) {
+        std::cerr << "Vulkan initialization previously failed, skipping.\n";
+        return nullptr;
+    }
 
-    ggml_backend_t vk_backend = new ggml_backend {
-        /* .guid      = */ ggml_backend_vk_guid(),
-        /* .interface = */ ggml_backend_vk_interface,
-        /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num),
-        /* .context   = */ ctx,
-    };
+    VK_LOG_DEBUG("ggml_backend_vk_init(" << dev_num << ")");
+
+    ggml_backend_vk_context* ctx = new ggml_backend_vk_context;
+
+    try {
+        // Initialize Vulkan context
+        ggml_vk_init(ctx, dev_num);
+
+        // Check if device initialization failed
+        if (!ctx->device) {
+            std::cerr << "Vulkan device initialization failed. Falling back to CPU backend." << std::endl;
+
+            // Cleanup Vulkan context
+            delete ctx;
+            return nullptr;
+        }
+
+        // Create Vulkan backend
+        ggml_backend_t vk_backend = new ggml_backend {
+            /* .guid      = */ ggml_backend_vk_guid(),
+            /* .interface = */ ggml_backend_vk_interface,
+            /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), dev_num),
+            /* .context   = */ ctx,
+        };
+
+        return vk_backend;
+    }
+    catch (const std::exception& e) {
+
+        g_vulkan_init_failed = true;
+        // Catch any unexpected errors during initialization
+        std::cerr << "Critical error in Vulkan backend initialization: " 
+                  << e.what() << ". Falling back to CPU backend." << std::endl;
+
+        // Cleanup Vulkan context
+        delete ctx;
+        return nullptr;
+    }
+    catch (...) {
 
-    return vk_backend;
+        g_vulkan_init_failed = true;
+        // Catch any unknown errors
+        std::cerr << "Unknown error during Vulkan backend initialization. Falling back to CPU backend." << std::endl;
+
+        // Cleanup Vulkan context
+        delete ctx;
+        return nullptr;
+    }
 }
 
 bool ggml_backend_is_vk(ggml_backend_t backend) {
@@ -8739,6 +8949,12 @@ static size_t ggml_backend_vk_reg_get_device_count(ggml_backend_reg_t reg) {
 }
 
 static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, size_t device) {
+
+      // Check global flag first
+    if (g_vulkan_init_failed) {
+        return nullptr;
+    }
+
     static std::vector<ggml_backend_dev_t> devices;
 
     static bool initialized = false;
@@ -8763,6 +8979,9 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
             initialized = true;
         }
     }
+    if (devices.empty() || device >= devices.size()) {
+        return nullptr;
+    }
 
     GGML_ASSERT(device < devices.size());
     return devices[device];
@@ -8776,6 +8995,10 @@ static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {
 };
 
 ggml_backend_reg_t ggml_backend_vk_reg() {
+
+    if (g_vulkan_init_failed) {
+        return nullptr;
+    }
     static ggml_backend_reg reg = {
         /* .api_version = */ GGML_BACKEND_API_VERSION,
         /* .iface       = */ ggml_backend_vk_reg_i,
@@ -8785,6 +9008,7 @@ ggml_backend_reg_t ggml_backend_vk_reg() {
         ggml_vk_instance_init();
         return &reg;
     } catch (const vk::SystemError& e) {
+         g_vulkan_init_failed = true;
         VK_LOG_DEBUG("ggml_backend_vk_reg() -> Error: System error: " << e.what());
         return nullptr;
     }
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 1da4eae7e63e2..14f91ee4c39e2 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -235,15 +235,28 @@ using buft_list_t = std::vector<std::pair<ggml_backend_dev_t, ggml_backend_buffe
 
 // find the first buffer type in the list that can use the tensor
 static ggml_backend_buffer_type_t select_weight_buft(const llama_hparams & hparams, ggml_tensor * tensor, ggml_op op, const buft_list_t & buft_list) {
-    GGML_ASSERT(!buft_list.empty());
+    if (buft_list.empty()) {
+        // If buffer list is empty, fall back to CPU immediately
+        return ggml_backend_cpu_buffer_type();
+    }
+
     for (const auto & cur : buft_list) {
         ggml_backend_dev_t cur_dev = cur.first;
         ggml_backend_buffer_type_t cur_buft = cur.second;
+
+        // Add null checks for safety
+        if (cur_dev == nullptr || cur_buft == nullptr) {
+            continue;
+        }
+
         if (weight_buft_supported(hparams, tensor, op, cur_buft, cur_dev)) {
             return cur_buft;
         }
     }
-    return nullptr;
+    
+    // If no compatible buffer type was found in the list,
+    // fall back to CPU buffer type instead of returning nullptr
+    return ggml_backend_cpu_buffer_type();
 }
 
 // CPU: ACCEL -> CPU extra -> GPU host -> CPU
@@ -304,30 +317,57 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
 static buft_list_t make_gpu_buft_list(ggml_backend_dev_t dev, enum llama_split_mode split_mode, const float * tensor_split) {
     buft_list_t buft_list;
 
-    // add the device split buffer type if requested and available
-    if (split_mode == LLAMA_SPLIT_MODE_ROW) {
-        ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
-        auto ggml_backend_split_buffer_type_fn = (ggml_backend_split_buffer_type_t)
-            ggml_backend_reg_get_proc_address(reg, "ggml_backend_split_buffer_type");
-        if (ggml_backend_split_buffer_type_fn) {
-            size_t dev_index = [&]() {
-                auto * reg = ggml_backend_dev_backend_reg(dev);
-                for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); ++i) {
-                    if (ggml_backend_reg_dev_get(reg, i) == dev) {
-                        return i;
+    // Try to add device buffer types, but be prepared for failures
+    try {
+        // Check if the device is valid/available
+        if (dev == nullptr) {
+            return buft_list; // Return empty list if device is null
+        }
+
+        // add the device split buffer type if requested and available
+        if (split_mode == LLAMA_SPLIT_MODE_ROW) {
+            ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
+            if (reg != nullptr) {
+                auto ggml_backend_split_buffer_type_fn = (ggml_backend_split_buffer_type_t)
+                    ggml_backend_reg_get_proc_address(reg, "ggml_backend_split_buffer_type");
+                if (ggml_backend_split_buffer_type_fn) {
+                    size_t dev_index = 0;
+                    bool found = false;
+
+                    // Find device index more safely
+                    for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); ++i) {
+                        if (ggml_backend_reg_dev_get(reg, i) == dev) {
+                            dev_index = i;
+                            found = true;
+                            break;
+                        }
+                    }
+
+                    if (found) {
+                        auto * buft = ggml_backend_split_buffer_type_fn(dev_index, tensor_split);
+                        if (buft != nullptr) {
+                            buft_list.emplace_back(dev, buft);
+                        }
                     }
                 }
-                throw std::runtime_error(format("device %s not found in its backend reg", ggml_backend_dev_name(dev)));
-            }();
-            auto * buft = ggml_backend_split_buffer_type_fn(dev_index, tensor_split);
-            if (buft != nullptr) {
-                buft_list.emplace_back(dev, buft);
             }
         }
-    }
 
-    // add the device default buffer type
-    buft_list.emplace_back(dev, ggml_backend_dev_buffer_type(dev));
+        // add the device default buffer type if it's available
+        ggml_backend_buffer_type_t dev_buft = ggml_backend_dev_buffer_type(dev);
+        if (dev_buft != nullptr) {
+            buft_list.emplace_back(dev, dev_buft);
+        }
+    }
+    catch (const std::exception& e) {
+        // Log the error but continue
+        const char* dev_name = dev ? ggml_backend_dev_name(dev) : "unknown";
+        //std::cerr << "Error adding buffer types for device " << dev_name << ": " << e.what() << std::endl;
+        //std::cerr << "Will fall back to other available buffer types" << std::endl;
+
+        // Return an empty list which will be filled with other buffer types later
+        buft_list.clear();
+    }
 
     return buft_list;
 }
diff --git a/src/llama.cpp b/src/llama.cpp
index 607f278615969..ef4463f095e65 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9701,10 +9701,12 @@ struct llama_context * llama_init_from_model(
             ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
             if (backend == nullptr) {
                 LLAMA_LOG_ERROR("%s: failed to initialize %s backend\n", __func__, ggml_backend_dev_name(dev));
-                llama_free(ctx);
-                return nullptr;
+                //llama_free(ctx);
+                //return nullptr;
+            }
+            else {
+                ctx->backends.emplace_back(backend);
             }
-            ctx->backends.emplace_back(backend);
         }
 
         // add ACCEL backends (such as BLAS)