leejet
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎assets/flux/chroma1-radiance.png‎
477 KB b/‎assets/flux/chroma1-radiance.png‎
477 KB
diff --git a/‎docs/chroma_radiance.md‎
Lines changed: 21 additions & 0 deletions b/‎docs/chroma_radiance.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎flux.hpp‎
Lines changed: 459 additions & 111 deletions b/‎flux.hpp‎
Lines changed: 459 additions & 111 deletions
diff --git a/‎ggml_extend.hpp‎
Lines changed: 10 additions & 1 deletion b/‎ggml_extend.hpp‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎model.cpp‎
Lines changed: 13 additions & 19 deletions b/‎model.cpp‎
Lines changed: 13 additions & 19 deletions
diff --git a/‎model.h‎
Lines changed: 6 additions & 1 deletion b/‎model.h‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎qwen_image.hpp‎
Lines changed: 1 addition & 1 deletion b/‎qwen_image.hpp‎
Lines changed: 1 addition & 1 deletion
@@ -35,10 +35,11 @@ API and command-line option may change frequently.***
   - Image Models
     - SD1.x, SD2.x, [SD-Turbo](https://huggingface.co/stabilityai/sd-turbo)
     - SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
-    - [some SD1.x and SDXL distilled models](./docs/distilled_sd.md)
+    - [Some SD1.x and SDXL distilled models](./docs/distilled_sd.md)
     - [SD3/SD3.5](./docs/sd3.md)
     - [Flux-dev/Flux-schnell](./docs/flux.md)
     - [Chroma](./docs/chroma.md)
+    - [Chroma1-Radiance](./docs/chroma_radiance.md)
     - [Qwen Image](./docs/qwen_image.md)
   - Image Edit Models
     - [FLUX.1-Kontext-dev](./docs/kontext.md)
 
@@ -0,0 +1,21 @@
+# How to Use
+
+## Download weights
+
+- Download Chroma1-Radiance
+    - safetensors: https://huggingface.co/lodestones/Chroma1-Radiance/tree/main
+    - gguf: https://huggingface.co/silveroxides/Chroma1-Radiance-GGUF/tree/main
+
+- Download t5xxl
+    - safetensors: https://huggingface.co/comfyanonymous/flux_text_encoders/blob/main/t5xxl_fp16.safetensors
+
+## Examples
+
+```
+.\bin\Release\sd.exe --diffusion-model  ..\..\ComfyUI\models\diffusion_models\Chroma1-Radiance-v0.4-Q8_0.gguf --t5xxl ..\..\ComfyUI\models\clip\t5xxl_fp16.safetensors  -p "a lovely cat holding a sign says 'chroma  radiance cpp'" --cfg-scale 4.0 --sampling-method euler -v
+```
+
+<img alt="Chroma1-Radiance" src="../assets/flux/chroma1-radiance.png" />
+
+
+
@@ -954,7 +954,16 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx,
     if (scale != 1.f) {
         x = ggml_scale(ctx, x, scale);
     }
-    x = ggml_mul_mat(ctx, w, x);
+    if (x->ne[2] * x->ne[3] > 1024) {
+        // workaround: avoid ggml cuda error
+        int64_t ne2 = x->ne[2];
+        int64_t ne3 = x->ne[3];
+        x           = ggml_reshape_2d(ctx, x, x->ne[0], x->ne[1] * x->ne[2] * x->ne[3]);
+        x           = ggml_mul_mat(ctx, w, x);
+        x           = ggml_reshape_4d(ctx, x, x->ne[0], x->ne[1] / ne2 / ne3, ne2, ne3);
+    } else {
+        x = ggml_mul_mat(ctx, w, x);
+    }
     if (force_prec_f32) {
         ggml_mul_mat_set_prec(x, GGML_PREC_F32);
     }
 
@@ -1778,7 +1778,6 @@ bool ModelLoader::model_is_unet() {
 
 SDVersion ModelLoader::get_sd_version() {
     TensorStorage token_embedding_weight, input_block_weight;
-    bool input_block_checked = false;
 
     bool has_multiple_encoders = false;
     bool is_unet               = false;
@@ -1791,12 +1790,12 @@ SDVersion ModelLoader::get_sd_version() {
     bool has_middle_block_1          = false;
 
     for (auto& tensor_storage : tensor_storages) {
-        if (!(is_xl || is_flux)) {
+        if (!(is_xl)) {
             if (tensor_storage.name.find("model.diffusion_model.double_blocks.") != std::string::npos) {
                 is_flux = true;
-                if (input_block_checked) {
-                    break;
-                }
+            }
+            if (tensor_storage.name.find("model.diffusion_model.nerf_final_layer_conv.") != std::string::npos) {
+                return VERSION_CHROMA_RADIANCE;
             }
             if (tensor_storage.name.find("model.diffusion_model.joint_blocks.") != std::string::npos) {
                 return VERSION_SD3;
@@ -1813,22 +1812,19 @@ SDVersion ModelLoader::get_sd_version() {
             if (tensor_storage.name.find("model.diffusion_model.img_emb") != std::string::npos) {
                 has_img_emb = true;
             }
-            if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos || tensor_storage.name.find("unet.down_blocks.") != std::string::npos) {
+            if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos ||
+                tensor_storage.name.find("unet.down_blocks.") != std::string::npos) {
                 is_unet = true;
                 if (has_multiple_encoders) {
                     is_xl = true;
-                    if (input_block_checked) {
-                        break;
-                    }
                 }
             }
-            if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos || tensor_storage.name.find("cond_stage_model.1") != std::string::npos || tensor_storage.name.find("te.1") != std::string::npos) {
+            if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos ||
+                tensor_storage.name.find("cond_stage_model.1") != std::string::npos ||
+                tensor_storage.name.find("te.1") != std::string::npos) {
                 has_multiple_encoders = true;
                 if (is_unet) {
                     is_xl = true;
-                    if (input_block_checked) {
-                        break;
-                    }
                 }
             }
             if (tensor_storage.name.find("model.diffusion_model.input_blocks.8.0.time_mixer.mix_factor") != std::string::npos) {
@@ -1848,12 +1844,10 @@ SDVersion ModelLoader::get_sd_version() {
             token_embedding_weight = tensor_storage;
             // break;
         }
-        if (tensor_storage.name == "model.diffusion_model.input_blocks.0.0.weight" || tensor_storage.name == "model.diffusion_model.img_in.weight" || tensor_storage.name == "unet.conv_in.weight") {
-            input_block_weight  = tensor_storage;
-            input_block_checked = true;
-            if (is_flux) {
-                break;
-            }
+        if (tensor_storage.name == "model.diffusion_model.input_blocks.0.0.weight" ||
+            tensor_storage.name == "model.diffusion_model.img_in.weight" ||
+            tensor_storage.name == "unet.conv_in.weight") {
+            input_block_weight = tensor_storage;
         }
     }
     if (is_wan) {
 
@@ -36,6 +36,7 @@ enum SDVersion {
     VERSION_FLUX_FILL,
     VERSION_FLUX_CONTROLS,
     VERSION_FLEX_2,
+    VERSION_CHROMA_RADIANCE,
     VERSION_WAN2,
     VERSION_WAN2_2_I2V,
     VERSION_WAN2_2_TI2V,
@@ -72,7 +73,11 @@ static inline bool sd_version_is_sd3(SDVersion version) {
 }
 
 static inline bool sd_version_is_flux(SDVersion version) {
-    if (version == VERSION_FLUX || version == VERSION_FLUX_FILL || version == VERSION_FLUX_CONTROLS || version == VERSION_FLEX_2) {
+    if (version == VERSION_FLUX ||
+        version == VERSION_FLUX_FILL ||
+        version == VERSION_FLUX_CONTROLS ||
+        version == VERSION_FLEX_2 ||
+        version == VERSION_CHROMA_RADIANCE) {
         return true;
     }
     return false;
 
@@ -649,7 +649,7 @@ namespace Qwen {
 
         static void load_from_file_and_test(const std::string& file_path) {
             // cuda q8: pass
-            // cuda q8 fa: nan
+            // cuda q8 fa: pass
             // ggml_backend_t backend    = ggml_backend_cuda_init(0);
             ggml_backend_t backend    = ggml_backend_cpu_init();
             ggml_type model_data_type = GGML_TYPE_Q8_0;