@@ -97,6 +97,8 @@ struct SDParams {
9797 bool clip_on_cpu = false ;
9898 bool vae_on_cpu = false ;
9999 bool diffusion_flash_attn = false ;
100+ bool diffusion_conv_direct = false ;
101+ bool vae_conv_direct = false ;
100102 bool canny_preprocess = false ;
101103 bool color = false ;
102104 int upscale_repeats = 1 ;
@@ -142,6 +144,8 @@ void print_params(SDParams params) {
142144 printf (" controlnet cpu: %s\n " , params.control_net_cpu ? " true" : " false" );
143145 printf (" vae decoder on cpu:%s\n " , params.vae_on_cpu ? " true" : " false" );
144146 printf (" diffusion flash attention:%s\n " , params.diffusion_flash_attn ? " true" : " false" );
147+ printf (" diffusion Conv2d direct:%s\n " , params.diffusion_conv_direct ? " true" : " false" );
148+ printf (" vae Conv2d direct:%s\n " , params.vae_conv_direct ? " true" : " false" );
145149 printf (" strength(control): %.2f\n " , params.control_strength );
146150 printf (" prompt: %s\n " , params.prompt .c_str ());
147151 printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
@@ -232,6 +236,10 @@ void print_usage(int argc, const char* argv[]) {
232236 printf (" --diffusion-fa use flash attention in the diffusion model (for low vram)\n " );
233237 printf (" Might lower quality, since it implies converting k and v to f16.\n " );
234238 printf (" This might crash if it is not supported by the backend.\n " );
239+ printf (" --diffusion-conv-direct use Conv2d direct in the diffusion model" );
240+ printf (" This might crash if it is not supported by the backend.\n " );
241+ printf (" --vae-conv-direct use Conv2d direct in the vae model (should improve the performance)" );
242+ printf (" This might crash if it is not supported by the backend.\n " );
235243 printf (" --control-net-cpu keep controlnet in cpu (for low vram)\n " );
236244 printf (" --canny apply canny preprocessor (edge detection)\n " );
237245 printf (" --color colors the logging tags according to level\n " );
@@ -422,6 +430,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
422430 {" " , " --clip-on-cpu" , " " , true , ¶ms.clip_on_cpu },
423431 {" " , " --vae-on-cpu" , " " , true , ¶ms.vae_on_cpu },
424432 {" " , " --diffusion-fa" , " " , true , ¶ms.diffusion_flash_attn },
433+ {" " , " --diffusion-conv-direct" , " " , true , ¶ms.diffusion_conv_direct },
434+ {" " , " --vae-conv-direct" , " " , true , ¶ms.vae_conv_direct },
425435 {" " , " --canny" , " " , true , ¶ms.canny_preprocess },
426436 {" -v" , " --verbos" , " " , true , ¶ms.verbose },
427437 {" " , " --color" , " " , true , ¶ms.color },
@@ -901,6 +911,8 @@ int main(int argc, const char* argv[]) {
901911 params.control_net_cpu ,
902912 params.vae_on_cpu ,
903913 params.diffusion_flash_attn ,
914+ params.diffusion_conv_direct ,
915+ params.vae_conv_direct ,
904916 params.chroma_use_dit_mask ,
905917 params.chroma_use_t5_mask ,
906918 params.chroma_t5_mask_pad ,
@@ -1012,7 +1024,8 @@ int main(int argc, const char* argv[]) {
10121024 int upscale_factor = 4 ; // unused for RealESRGAN_x4plus_anime_6B.pth
10131025 if (params.esrgan_path .size () > 0 && params.upscale_repeats > 0 ) {
10141026 upscaler_ctx_t * upscaler_ctx = new_upscaler_ctx (params.esrgan_path .c_str (),
1015- params.n_threads );
1027+ params.n_threads ,
1028+ params.diffusion_conv_direct );
10161029
10171030 if (upscaler_ctx == NULL ) {
10181031 printf (" new_upscaler_ctx failed\n " );
0 commit comments