feat: Support DBRX model in Llama

reneleonhardt · reneleonhardt · commit 3bc8480fe7d9 · 2024-04-15T13:54:47.000+02:00
diff --git a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java
@@ -43,16 +43,29 @@ public enum HuggingFaceModel {
   WIZARD_CODER_PYTHON_13B_Q5(13, 5, "WizardCoder-Python-13B-V1.0-GGUF"),
   WIZARD_CODER_PYTHON_34B_Q3(34, 3, "WizardCoder-Python-34B-V1.0-GGUF"),
   WIZARD_CODER_PYTHON_34B_Q4(34, 4, "WizardCoder-Python-34B-V1.0-GGUF"),
-  WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF");
+  WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF"),
+
+  DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
+  DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
+  DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
+  DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
+  DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
+  DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");
 
   private final int parameterSize;
   private final int quantization;
   private final String modelName;
+  private final String user;
 
   HuggingFaceModel(int parameterSize, int quantization, String modelName) {
+    this(parameterSize, quantization, modelName, "TheBloke");
+  }
+
+  HuggingFaceModel(int parameterSize, int quantization, String modelName, String user) {
     this.parameterSize = parameterSize;
     this.quantization = quantization;
     this.modelName = modelName;
+    this.user = user;
   }
 
   public int getParameterSize() {
@@ -68,21 +81,25 @@ public String getCode() {
   }
 
   public String getFileName() {
-    return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
+    if ("TheBloke".equals(user)) {
+      return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
+    }
+    // TODO: Download all 10 files ;(
+    return modelName.toLowerCase().replace("-gguf", "-00001-of-00010.gguf");
   }
 
   public URL getFileURL() {
     try {
       return new URL(
-          format("https://huggingface.co/TheBloke/%s/resolve/main/%s", modelName, getFileName()));
+          "https://huggingface.co/%s/%s/resolve/main/%s".formatted(user, modelName, getFileName()));
     } catch (MalformedURLException ex) {
       throw new RuntimeException(ex);
     }
   }
 
   public URL getHuggingFaceURL() {
     try {
-      return new URL("https://huggingface.co/TheBloke/" + modelName);
+      return new URL("https://huggingface.co/%s/%s".formatted(user, modelName));
     } catch (MalformedURLException ex) {
       throw new RuntimeException(ex);
     }
diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java
@@ -82,7 +82,21 @@ public enum LlamaModel {
           HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q5,
           HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q3,
           HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q4,
-          HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5));
+          HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5)),
+  DBRX(
+      "DBRX",
+      "DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
+          + "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
+          + "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
+          + "language understanding, programming, math, and logic.",
+      PromptTemplate.CHAT_ML,
+      List.of(
+          HuggingFaceModel.DBRX_12B_Q3_K_M,
+          HuggingFaceModel.DBRX_12B_Q4_0,
+          HuggingFaceModel.DBRX_12B_Q6_K,
+          HuggingFaceModel.DBRX_12B_Q8_0,
+          HuggingFaceModel.DBRX_12B_Q3_S,
+          HuggingFaceModel.DBRX_12B_Q3_XXS));
 
   private final String label;
   private final String description;