diff --git a/load_models.py b/load_models.py index b7fa5a37..6822515a 100644 --- a/load_models.py +++ b/load_models.py @@ -54,10 +54,11 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin kwargs["n_gpu_layers"] = N_GPU_LAYERS # set this based on your GPU return LlamaCpp(**kwargs) - except: + except Exception as e: if "ggml" in model_basename: logging.INFO("If you were using GGML model, LLAMA-CPP Dropped Support, Use GGUF Instead") - return None + logging.info("LlamaCpp conversion error: ") + raise e def load_quantized_model_qptq(model_id, model_basename, device_type, logging):