Skip to content

Commit 3bc8480

Browse files
committed
feat: Support DBRX model in Llama
1 parent 5f16213 commit 3bc8480

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,29 @@ public enum HuggingFaceModel {
4343
WIZARD_CODER_PYTHON_13B_Q5(13, 5, "WizardCoder-Python-13B-V1.0-GGUF"),
4444
WIZARD_CODER_PYTHON_34B_Q3(34, 3, "WizardCoder-Python-34B-V1.0-GGUF"),
4545
WIZARD_CODER_PYTHON_34B_Q4(34, 4, "WizardCoder-Python-34B-V1.0-GGUF"),
46-
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF");
46+
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF"),
47+
48+
DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
49+
DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
50+
DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
51+
DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
52+
DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
53+
DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");
4754

4855
private final int parameterSize;
4956
private final int quantization;
5057
private final String modelName;
58+
private final String user;
5159

5260
HuggingFaceModel(int parameterSize, int quantization, String modelName) {
61+
this(parameterSize, quantization, modelName, "TheBloke");
62+
}
63+
64+
HuggingFaceModel(int parameterSize, int quantization, String modelName, String user) {
5365
this.parameterSize = parameterSize;
5466
this.quantization = quantization;
5567
this.modelName = modelName;
68+
this.user = user;
5669
}
5770

5871
public int getParameterSize() {
@@ -68,21 +81,25 @@ public String getCode() {
6881
}
6982

7083
public String getFileName() {
71-
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
84+
if ("TheBloke".equals(user)) {
85+
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
86+
}
87+
// TODO: Download all 10 files ;(
88+
return modelName.toLowerCase().replace("-gguf", "-00001-of-00010.gguf");
7289
}
7390

7491
public URL getFileURL() {
7592
try {
7693
return new URL(
77-
format("https://huggingface.co/TheBloke/%s/resolve/main/%s", modelName, getFileName()));
94+
"https://huggingface.co/%s/%s/resolve/main/%s".formatted(user, modelName, getFileName()));
7895
} catch (MalformedURLException ex) {
7996
throw new RuntimeException(ex);
8097
}
8198
}
8299

83100
public URL getHuggingFaceURL() {
84101
try {
85-
return new URL("https://huggingface.co/TheBloke/" + modelName);
102+
return new URL("https://huggingface.co/%s/%s".formatted(user, modelName));
86103
} catch (MalformedURLException ex) {
87104
throw new RuntimeException(ex);
88105
}

src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,21 @@ public enum LlamaModel {
8282
HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q5,
8383
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q3,
8484
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q4,
85-
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5));
85+
HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5)),
86+
DBRX(
87+
"DBRX",
88+
"DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
89+
+ "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
90+
+ "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
91+
+ "language understanding, programming, math, and logic.",
92+
PromptTemplate.CHAT_ML,
93+
List.of(
94+
HuggingFaceModel.DBRX_12B_Q3_K_M,
95+
HuggingFaceModel.DBRX_12B_Q4_0,
96+
HuggingFaceModel.DBRX_12B_Q6_K,
97+
HuggingFaceModel.DBRX_12B_Q8_0,
98+
HuggingFaceModel.DBRX_12B_Q3_S,
99+
HuggingFaceModel.DBRX_12B_Q3_XXS));
86100

87101
private final String label;
88102
private final String description;

0 commit comments

Comments
 (0)