diff --git a/.gitignore b/.gitignore index 8084e7b..f4104ec 100644 --- a/.gitignore +++ b/.gitignore @@ -201,4 +201,6 @@ cython_debug/ data/ ddata/ migrators/ -data.*.tar \ No newline at end of file +data.*.tar +sd-v1-5.safetensors +out/ diff --git a/README.md b/README.md index 5abcb59..add3be1 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,24 @@ prompts = ["a photograph of an astronaut riding a horse"] images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu') ``` +Load an original stable diffusion model in safetensors format: +```py +import safetensors.torch +from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader + +# load the checkpoint file +state_dict = safetensors.torch.load_file('/PATH/TO/sd-v1-5.safetensors') + +# split the state_dict into the 4 state_dicts for our models +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +# create the model objects, and apply the weights in state_dicts +models = model_loader.load_models(state_dicts, 'cpu') + +prompts = ["a photograph of an astronaut riding a horse"] +images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu') +``` + Image-to-image generation: ```py from PIL import Image diff --git a/convert_model.py b/convert_model.py new file mode 100644 index 0000000..c72460f --- /dev/null +++ b/convert_model.py @@ -0,0 +1,28 @@ +import argparse +import safetensors.torch +from stable_diffusion_pytorch import convert_from_sdmodel + +parser = argparse.ArgumentParser(description='Convert an original stable diffusion checkpoint file in safetensor format to 4 separate safetensors for this library.') +parser.add_argument( + "--sd_model", type=str, required=True, + help="Stable Diffusion model to load in safetensors format" +) +parser.add_argument( + "--save_to", type=str, required=True, + help="The prefix of the path to save to, for example \"./mymodel_\" " \ + "will save 4 files, mymodel_clip.safetensors, mymodel_decoder.safetensors, " \ + "mymodel_encoder.safetensors, and mymodel_diffusion.safetensors" +) + +args = parser.parse_args() + +# load the checkpoint file +state_dict = safetensors.torch.load_file(args.sd_model) + +# convert to the state_dicts format that our library expects +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +for key in state_dicts: + outPath = args.save_to + key + '.safetensors' + print('Writing', outPath) + safetensors.torch.save_file(state_dicts[key], outPath) diff --git a/demo_sdmodel_generate_image.py b/demo_sdmodel_generate_image.py new file mode 100644 index 0000000..efaf0a2 --- /dev/null +++ b/demo_sdmodel_generate_image.py @@ -0,0 +1,46 @@ +# python3 +# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images. +import os +import safetensors.torch +import torch +from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader + +# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'. +import intel_extension_for_pytorch as ipex +device = 'xpu' + + +# where to store generated images +outDir = './out' + +# path to SD 1.4/1.5 based model safetensors file +modelPath = './sd-v1-5.safetensors' # download from https://huggingface.co/tabtap/sd-v1-5.safetensor/tree/main + +# either float16 or float32 +dtype = torch.float16 + + + +os.makedirs(outDir, exist_ok=True) + +# load the checkpoint file +state_dict = safetensors.torch.load_file(modelPath) + +# convert to the state_dicts format that our library expects +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +# create the model objects, and apply the weights in state_dicts +models = model_loader.load_models(state_dicts, device, dtype) + +steps = 40 +seed = 12345 +prompt = '1girl,cirno,anime style' +negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers' + +fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png' + +images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], + models=models, n_inference_steps=steps, seed=seed, device=device, + height=768, width=512) + +images[0].save(outDir + '/' + fileName) diff --git a/stable_diffusion_pytorch/convert_from_sdmodel.py b/stable_diffusion_pytorch/convert_from_sdmodel.py new file mode 100644 index 0000000..3f09b1b --- /dev/null +++ b/stable_diffusion_pytorch/convert_from_sdmodel.py @@ -0,0 +1,1069 @@ +import torch +import safetensors.torch +from . import Tokenizer, CLIP, Encoder, Decoder, Diffusion + +r""" + Our library model implementation is laid out differently from the original stable diffusion, so + original SD state_dict can not be directly loaded. This function converts an original SD + state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that can be passed to + model_loader.load_models() + Args: + state_dict (`Dict[str, str]`): + The original state_dict to convert. + Returns: + `Dict[str, Dict[str, str]]`: + The converted state dicts. + """ +def split_state_dict(state_dict): + new = {} + new['diffusion'] = {} + new['encoder'] = {} + new['decoder'] = {} + new['clip'] = {} + + s = state_dict + new['diffusion']['time_embedding.linear_1.weight'] = s['model.diffusion_model.time_embed.0.weight'] + new['diffusion']['time_embedding.linear_1.bias'] = s['model.diffusion_model.time_embed.0.bias'] + new['diffusion']['time_embedding.linear_2.weight'] = s['model.diffusion_model.time_embed.2.weight'] + new['diffusion']['time_embedding.linear_2.bias'] = s['model.diffusion_model.time_embed.2.bias'] + new['diffusion']['unet.encoders.0.0.weight'] = s['model.diffusion_model.input_blocks.0.0.weight'] + new['diffusion']['unet.encoders.0.0.bias'] = s['model.diffusion_model.input_blocks.0.0.bias'] + new['diffusion']['unet.encoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.1.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.1.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.1.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.1.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.1.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.1.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.1.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.1.1.norm.weight'] + new['diffusion']['unet.encoders.1.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.1.1.norm.bias'] + new['diffusion']['unet.encoders.1.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_in.weight'] + new['diffusion']['unet.encoders.1.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_in.bias'] + new['diffusion']['unet.encoders.1.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.1.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.1.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.1.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.1.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.1.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.1.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.1.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_out.weight'] + new['diffusion']['unet.encoders.1.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_out.bias'] + new['diffusion']['unet.encoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.2.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.2.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.2.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.2.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.2.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.2.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.2.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.2.1.norm.weight'] + new['diffusion']['unet.encoders.2.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.2.1.norm.bias'] + new['diffusion']['unet.encoders.2.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_in.weight'] + new['diffusion']['unet.encoders.2.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_in.bias'] + new['diffusion']['unet.encoders.2.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.2.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.2.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.2.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.2.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.2.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.2.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.2.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_out.weight'] + new['diffusion']['unet.encoders.2.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_out.bias'] + new['diffusion']['unet.encoders.3.0.weight'] = s['model.diffusion_model.input_blocks.3.0.op.weight'] + new['diffusion']['unet.encoders.3.0.bias'] = s['model.diffusion_model.input_blocks.3.0.op.bias'] + new['diffusion']['unet.encoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.4.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.4.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.4.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.4.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.4.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.4.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.4.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.weight'] + new['diffusion']['unet.encoders.4.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.bias'] + new['diffusion']['unet.encoders.4.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.4.1.norm.weight'] + new['diffusion']['unet.encoders.4.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.4.1.norm.bias'] + new['diffusion']['unet.encoders.4.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_in.weight'] + new['diffusion']['unet.encoders.4.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_in.bias'] + new['diffusion']['unet.encoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.4.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_out.weight'] + new['diffusion']['unet.encoders.4.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_out.bias'] + new['diffusion']['unet.encoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.5.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.5.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.5.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.5.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.5.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.5.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.5.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.5.1.norm.weight'] + new['diffusion']['unet.encoders.5.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.5.1.norm.bias'] + new['diffusion']['unet.encoders.5.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_in.weight'] + new['diffusion']['unet.encoders.5.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_in.bias'] + new['diffusion']['unet.encoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.5.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_out.weight'] + new['diffusion']['unet.encoders.5.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_out.bias'] + new['diffusion']['unet.encoders.6.0.weight'] = s['model.diffusion_model.input_blocks.6.0.op.weight'] + new['diffusion']['unet.encoders.6.0.bias'] = s['model.diffusion_model.input_blocks.6.0.op.bias'] + new['diffusion']['unet.encoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.7.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.7.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.7.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.7.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.7.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.7.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.7.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.weight'] + new['diffusion']['unet.encoders.7.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.bias'] + new['diffusion']['unet.encoders.7.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.7.1.norm.weight'] + new['diffusion']['unet.encoders.7.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.7.1.norm.bias'] + new['diffusion']['unet.encoders.7.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_in.weight'] + new['diffusion']['unet.encoders.7.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_in.bias'] + new['diffusion']['unet.encoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.7.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_out.weight'] + new['diffusion']['unet.encoders.7.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_out.bias'] + new['diffusion']['unet.encoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.8.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.8.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.8.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.8.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.8.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.8.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.8.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.8.1.norm.weight'] + new['diffusion']['unet.encoders.8.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.8.1.norm.bias'] + new['diffusion']['unet.encoders.8.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_in.weight'] + new['diffusion']['unet.encoders.8.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_in.bias'] + new['diffusion']['unet.encoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.8.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_out.weight'] + new['diffusion']['unet.encoders.8.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_out.bias'] + new['diffusion']['unet.encoders.9.0.weight'] = s['model.diffusion_model.input_blocks.9.0.op.weight'] + new['diffusion']['unet.encoders.9.0.bias'] = s['model.diffusion_model.input_blocks.9.0.op.bias'] + new['diffusion']['unet.encoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.10.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.10.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.10.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.10.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.10.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.10.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.11.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.11.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.11.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.11.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.11.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.11.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.bias'] + new['diffusion']['unet.bottleneck.0.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.0.weight'] + new['diffusion']['unet.bottleneck.0.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.0.bias'] + new['diffusion']['unet.bottleneck.0.conv_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.2.weight'] + new['diffusion']['unet.bottleneck.0.conv_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.2.bias'] + new['diffusion']['unet.bottleneck.0.linear_time.weight'] = s['model.diffusion_model.middle_block.0.emb_layers.1.weight'] + new['diffusion']['unet.bottleneck.0.linear_time.bias'] = s['model.diffusion_model.middle_block.0.emb_layers.1.bias'] + new['diffusion']['unet.bottleneck.0.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.0.weight'] + new['diffusion']['unet.bottleneck.0.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.0.bias'] + new['diffusion']['unet.bottleneck.0.conv_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.3.weight'] + new['diffusion']['unet.bottleneck.0.conv_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.3.bias'] + new['diffusion']['unet.bottleneck.1.groupnorm.weight'] = s['model.diffusion_model.middle_block.1.norm.weight'] + new['diffusion']['unet.bottleneck.1.groupnorm.bias'] = s['model.diffusion_model.middle_block.1.norm.bias'] + new['diffusion']['unet.bottleneck.1.conv_input.weight'] = s['model.diffusion_model.middle_block.1.proj_in.weight'] + new['diffusion']['unet.bottleneck.1.conv_input.bias'] = s['model.diffusion_model.middle_block.1.proj_in.bias'] + new['diffusion']['unet.bottleneck.1.attention_1.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.bottleneck.1.attention_1.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.bottleneck.1.linear_geglu_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.bottleneck.1.linear_geglu_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.bottleneck.1.linear_geglu_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.bottleneck.1.linear_geglu_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.bottleneck.1.attention_2.q_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.k_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.v_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_3.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_3.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.bottleneck.1.conv_output.weight'] = s['model.diffusion_model.middle_block.1.proj_out.weight'] + new['diffusion']['unet.bottleneck.1.conv_output.bias'] = s['model.diffusion_model.middle_block.1.proj_out.bias'] + new['diffusion']['unet.bottleneck.2.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.0.weight'] + new['diffusion']['unet.bottleneck.2.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.0.bias'] + new['diffusion']['unet.bottleneck.2.conv_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.2.weight'] + new['diffusion']['unet.bottleneck.2.conv_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.2.bias'] + new['diffusion']['unet.bottleneck.2.linear_time.weight'] = s['model.diffusion_model.middle_block.2.emb_layers.1.weight'] + new['diffusion']['unet.bottleneck.2.linear_time.bias'] = s['model.diffusion_model.middle_block.2.emb_layers.1.bias'] + new['diffusion']['unet.bottleneck.2.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.0.weight'] + new['diffusion']['unet.bottleneck.2.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.0.bias'] + new['diffusion']['unet.bottleneck.2.conv_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.3.weight'] + new['diffusion']['unet.bottleneck.2.conv_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.3.bias'] + new['diffusion']['unet.decoders.0.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.0.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.0.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.0.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.0.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.0.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.0.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.0.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.0.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.0.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.0.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.weight'] + new['diffusion']['unet.decoders.0.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.bias'] + new['diffusion']['unet.decoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.1.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.1.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.1.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.1.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.1.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.1.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.1.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.weight'] + new['diffusion']['unet.decoders.1.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.bias'] + new['diffusion']['unet.decoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.2.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.2.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.2.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.2.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.2.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.2.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.2.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.weight'] + new['diffusion']['unet.decoders.2.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.bias'] + new['diffusion']['unet.decoders.2.1.conv.weight'] = s['model.diffusion_model.output_blocks.2.1.conv.weight'] + new['diffusion']['unet.decoders.2.1.conv.bias'] = s['model.diffusion_model.output_blocks.2.1.conv.bias'] + new['diffusion']['unet.decoders.3.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.3.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.3.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.3.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.3.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.3.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.3.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.3.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.3.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.3.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.3.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.weight'] + new['diffusion']['unet.decoders.3.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.bias'] + new['diffusion']['unet.decoders.3.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.3.1.norm.weight'] + new['diffusion']['unet.decoders.3.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.3.1.norm.bias'] + new['diffusion']['unet.decoders.3.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_in.weight'] + new['diffusion']['unet.decoders.3.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_in.bias'] + new['diffusion']['unet.decoders.3.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.3.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.3.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.3.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.3.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.3.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.3.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.3.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_out.weight'] + new['diffusion']['unet.decoders.3.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_out.bias'] + new['diffusion']['unet.decoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.4.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.4.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.4.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.4.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.4.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.4.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.4.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.weight'] + new['diffusion']['unet.decoders.4.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.bias'] + new['diffusion']['unet.decoders.4.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.4.1.norm.weight'] + new['diffusion']['unet.decoders.4.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.4.1.norm.bias'] + new['diffusion']['unet.decoders.4.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_in.weight'] + new['diffusion']['unet.decoders.4.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_in.bias'] + new['diffusion']['unet.decoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.4.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_out.weight'] + new['diffusion']['unet.decoders.4.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_out.bias'] + new['diffusion']['unet.decoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.5.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.5.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.5.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.5.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.5.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.5.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.5.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.weight'] + new['diffusion']['unet.decoders.5.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.bias'] + new['diffusion']['unet.decoders.5.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.5.1.norm.weight'] + new['diffusion']['unet.decoders.5.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.5.1.norm.bias'] + new['diffusion']['unet.decoders.5.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_in.weight'] + new['diffusion']['unet.decoders.5.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_in.bias'] + new['diffusion']['unet.decoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.5.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_out.weight'] + new['diffusion']['unet.decoders.5.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_out.bias'] + new['diffusion']['unet.decoders.5.2.conv.weight'] = s['model.diffusion_model.output_blocks.5.2.conv.weight'] + new['diffusion']['unet.decoders.5.2.conv.bias'] = s['model.diffusion_model.output_blocks.5.2.conv.bias'] + new['diffusion']['unet.decoders.6.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.6.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.6.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.6.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.6.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.6.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.6.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.6.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.6.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.6.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.6.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.weight'] + new['diffusion']['unet.decoders.6.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.bias'] + new['diffusion']['unet.decoders.6.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.6.1.norm.weight'] + new['diffusion']['unet.decoders.6.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.6.1.norm.bias'] + new['diffusion']['unet.decoders.6.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_in.weight'] + new['diffusion']['unet.decoders.6.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_in.bias'] + new['diffusion']['unet.decoders.6.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.6.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.6.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.6.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.6.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.6.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.6.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.6.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_out.weight'] + new['diffusion']['unet.decoders.6.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_out.bias'] + new['diffusion']['unet.decoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.7.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.7.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.7.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.7.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.7.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.7.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.7.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.weight'] + new['diffusion']['unet.decoders.7.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.bias'] + new['diffusion']['unet.decoders.7.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.7.1.norm.weight'] + new['diffusion']['unet.decoders.7.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.7.1.norm.bias'] + new['diffusion']['unet.decoders.7.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_in.weight'] + new['diffusion']['unet.decoders.7.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_in.bias'] + new['diffusion']['unet.decoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.7.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_out.weight'] + new['diffusion']['unet.decoders.7.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_out.bias'] + new['diffusion']['unet.decoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.8.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.8.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.8.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.8.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.8.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.8.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.8.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.weight'] + new['diffusion']['unet.decoders.8.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.bias'] + new['diffusion']['unet.decoders.8.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.8.1.norm.weight'] + new['diffusion']['unet.decoders.8.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.8.1.norm.bias'] + new['diffusion']['unet.decoders.8.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_in.weight'] + new['diffusion']['unet.decoders.8.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_in.bias'] + new['diffusion']['unet.decoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.8.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_out.weight'] + new['diffusion']['unet.decoders.8.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_out.bias'] + new['diffusion']['unet.decoders.8.2.conv.weight'] = s['model.diffusion_model.output_blocks.8.2.conv.weight'] + new['diffusion']['unet.decoders.8.2.conv.bias'] = s['model.diffusion_model.output_blocks.8.2.conv.bias'] + new['diffusion']['unet.decoders.9.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.9.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.9.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.9.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.9.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.9.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.9.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.9.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.9.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.9.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.9.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.weight'] + new['diffusion']['unet.decoders.9.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.bias'] + new['diffusion']['unet.decoders.9.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.9.1.norm.weight'] + new['diffusion']['unet.decoders.9.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.9.1.norm.bias'] + new['diffusion']['unet.decoders.9.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_in.weight'] + new['diffusion']['unet.decoders.9.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_in.bias'] + new['diffusion']['unet.decoders.9.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.9.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.9.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.9.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.9.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.9.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.9.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.9.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_out.weight'] + new['diffusion']['unet.decoders.9.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_out.bias'] + new['diffusion']['unet.decoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.10.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.10.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.10.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.10.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.10.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.10.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.10.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.weight'] + new['diffusion']['unet.decoders.10.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.bias'] + new['diffusion']['unet.decoders.10.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.10.1.norm.weight'] + new['diffusion']['unet.decoders.10.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.10.1.norm.bias'] + new['diffusion']['unet.decoders.10.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_in.weight'] + new['diffusion']['unet.decoders.10.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_in.bias'] + new['diffusion']['unet.decoders.10.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.10.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.10.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.10.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.10.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.10.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.10.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.10.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_out.weight'] + new['diffusion']['unet.decoders.10.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_out.bias'] + new['diffusion']['unet.decoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.11.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.11.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.11.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.11.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.11.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.11.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.11.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.weight'] + new['diffusion']['unet.decoders.11.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.bias'] + new['diffusion']['unet.decoders.11.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.11.1.norm.weight'] + new['diffusion']['unet.decoders.11.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.11.1.norm.bias'] + new['diffusion']['unet.decoders.11.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_in.weight'] + new['diffusion']['unet.decoders.11.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_in.bias'] + new['diffusion']['unet.decoders.11.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.11.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.11.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.11.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.11.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.11.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.11.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.11.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_out.weight'] + new['diffusion']['unet.decoders.11.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_out.bias'] + new['diffusion']['final.groupnorm.weight'] = s['model.diffusion_model.out.0.weight'] + new['diffusion']['final.groupnorm.bias'] = s['model.diffusion_model.out.0.bias'] + new['diffusion']['final.conv.weight'] = s['model.diffusion_model.out.2.weight'] + new['diffusion']['final.conv.bias'] = s['model.diffusion_model.out.2.bias'] + new['encoder']['0.weight'] = s['first_stage_model.encoder.conv_in.weight'] + new['encoder']['0.bias'] = s['first_stage_model.encoder.conv_in.bias'] + new['encoder']['1.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.0.norm1.weight'] + new['encoder']['1.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.0.norm1.bias'] + new['encoder']['1.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.0.conv1.weight'] + new['encoder']['1.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.0.conv1.bias'] + new['encoder']['1.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.0.norm2.weight'] + new['encoder']['1.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.0.norm2.bias'] + new['encoder']['1.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.0.conv2.weight'] + new['encoder']['1.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.0.conv2.bias'] + new['encoder']['2.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.1.norm1.weight'] + new['encoder']['2.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.1.norm1.bias'] + new['encoder']['2.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.1.conv1.weight'] + new['encoder']['2.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.1.conv1.bias'] + new['encoder']['2.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.1.norm2.weight'] + new['encoder']['2.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.1.norm2.bias'] + new['encoder']['2.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.1.conv2.weight'] + new['encoder']['2.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.1.conv2.bias'] + new['encoder']['3.weight'] = s['first_stage_model.encoder.down.0.downsample.conv.weight'] + new['encoder']['3.bias'] = s['first_stage_model.encoder.down.0.downsample.conv.bias'] + new['encoder']['4.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.0.norm1.weight'] + new['encoder']['4.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.0.norm1.bias'] + new['encoder']['4.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.0.conv1.weight'] + new['encoder']['4.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.0.conv1.bias'] + new['encoder']['4.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.0.norm2.weight'] + new['encoder']['4.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.0.norm2.bias'] + new['encoder']['4.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.0.conv2.weight'] + new['encoder']['4.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.0.conv2.bias'] + new['encoder']['4.residual_layer.weight'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.weight'] + new['encoder']['4.residual_layer.bias'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.bias'] + new['encoder']['5.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.1.norm1.weight'] + new['encoder']['5.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.1.norm1.bias'] + new['encoder']['5.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.1.conv1.weight'] + new['encoder']['5.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.1.conv1.bias'] + new['encoder']['5.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.1.norm2.weight'] + new['encoder']['5.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.1.norm2.bias'] + new['encoder']['5.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.1.conv2.weight'] + new['encoder']['5.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.1.conv2.bias'] + new['encoder']['6.weight'] = s['first_stage_model.encoder.down.1.downsample.conv.weight'] + new['encoder']['6.bias'] = s['first_stage_model.encoder.down.1.downsample.conv.bias'] + new['encoder']['7.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.0.norm1.weight'] + new['encoder']['7.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.0.norm1.bias'] + new['encoder']['7.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.0.conv1.weight'] + new['encoder']['7.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.0.conv1.bias'] + new['encoder']['7.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.0.norm2.weight'] + new['encoder']['7.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.0.norm2.bias'] + new['encoder']['7.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.0.conv2.weight'] + new['encoder']['7.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.0.conv2.bias'] + new['encoder']['7.residual_layer.weight'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.weight'] + new['encoder']['7.residual_layer.bias'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.bias'] + new['encoder']['8.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.1.norm1.weight'] + new['encoder']['8.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.1.norm1.bias'] + new['encoder']['8.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.1.conv1.weight'] + new['encoder']['8.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.1.conv1.bias'] + new['encoder']['8.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.1.norm2.weight'] + new['encoder']['8.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.1.norm2.bias'] + new['encoder']['8.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.1.conv2.weight'] + new['encoder']['8.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.1.conv2.bias'] + new['encoder']['9.weight'] = s['first_stage_model.encoder.down.2.downsample.conv.weight'] + new['encoder']['9.bias'] = s['first_stage_model.encoder.down.2.downsample.conv.bias'] + new['encoder']['10.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.0.norm1.weight'] + new['encoder']['10.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.0.norm1.bias'] + new['encoder']['10.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.0.conv1.weight'] + new['encoder']['10.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.0.conv1.bias'] + new['encoder']['10.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.0.norm2.weight'] + new['encoder']['10.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.0.norm2.bias'] + new['encoder']['10.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.0.conv2.weight'] + new['encoder']['10.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.0.conv2.bias'] + new['encoder']['11.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.1.norm1.weight'] + new['encoder']['11.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.1.norm1.bias'] + new['encoder']['11.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.1.conv1.weight'] + new['encoder']['11.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.1.conv1.bias'] + new['encoder']['11.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.1.norm2.weight'] + new['encoder']['11.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.1.norm2.bias'] + new['encoder']['11.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.1.conv2.weight'] + new['encoder']['11.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.1.conv2.bias'] + new['encoder']['12.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_1.norm1.weight'] + new['encoder']['12.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_1.norm1.bias'] + new['encoder']['12.conv_1.weight'] = s['first_stage_model.encoder.mid.block_1.conv1.weight'] + new['encoder']['12.conv_1.bias'] = s['first_stage_model.encoder.mid.block_1.conv1.bias'] + new['encoder']['12.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_1.norm2.weight'] + new['encoder']['12.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_1.norm2.bias'] + new['encoder']['12.conv_2.weight'] = s['first_stage_model.encoder.mid.block_1.conv2.weight'] + new['encoder']['12.conv_2.bias'] = s['first_stage_model.encoder.mid.block_1.conv2.bias'] + new['encoder']['13.groupnorm.weight'] = s['first_stage_model.encoder.mid.attn_1.norm.weight'] + new['encoder']['13.groupnorm.bias'] = s['first_stage_model.encoder.mid.attn_1.norm.bias'] + new['encoder']['13.attention.out_proj.bias'] = s['first_stage_model.encoder.mid.attn_1.proj_out.bias'] + new['encoder']['14.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_2.norm1.weight'] + new['encoder']['14.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_2.norm1.bias'] + new['encoder']['14.conv_1.weight'] = s['first_stage_model.encoder.mid.block_2.conv1.weight'] + new['encoder']['14.conv_1.bias'] = s['first_stage_model.encoder.mid.block_2.conv1.bias'] + new['encoder']['14.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_2.norm2.weight'] + new['encoder']['14.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_2.norm2.bias'] + new['encoder']['14.conv_2.weight'] = s['first_stage_model.encoder.mid.block_2.conv2.weight'] + new['encoder']['14.conv_2.bias'] = s['first_stage_model.encoder.mid.block_2.conv2.bias'] + new['encoder']['15.weight'] = s['first_stage_model.encoder.norm_out.weight'] + new['encoder']['15.bias'] = s['first_stage_model.encoder.norm_out.bias'] + new['encoder']['17.weight'] = s['first_stage_model.encoder.conv_out.weight'] + new['encoder']['17.bias'] = s['first_stage_model.encoder.conv_out.bias'] + new['decoder']['1.weight'] = s['first_stage_model.decoder.conv_in.weight'] + new['decoder']['1.bias'] = s['first_stage_model.decoder.conv_in.bias'] + new['decoder']['2.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_1.norm1.weight'] + new['decoder']['2.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_1.norm1.bias'] + new['decoder']['2.conv_1.weight'] = s['first_stage_model.decoder.mid.block_1.conv1.weight'] + new['decoder']['2.conv_1.bias'] = s['first_stage_model.decoder.mid.block_1.conv1.bias'] + new['decoder']['2.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_1.norm2.weight'] + new['decoder']['2.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_1.norm2.bias'] + new['decoder']['2.conv_2.weight'] = s['first_stage_model.decoder.mid.block_1.conv2.weight'] + new['decoder']['2.conv_2.bias'] = s['first_stage_model.decoder.mid.block_1.conv2.bias'] + new['decoder']['3.groupnorm.weight'] = s['first_stage_model.decoder.mid.attn_1.norm.weight'] + new['decoder']['3.groupnorm.bias'] = s['first_stage_model.decoder.mid.attn_1.norm.bias'] + new['decoder']['3.attention.out_proj.bias'] = s['first_stage_model.decoder.mid.attn_1.proj_out.bias'] + new['decoder']['4.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_2.norm1.weight'] + new['decoder']['4.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_2.norm1.bias'] + new['decoder']['4.conv_1.weight'] = s['first_stage_model.decoder.mid.block_2.conv1.weight'] + new['decoder']['4.conv_1.bias'] = s['first_stage_model.decoder.mid.block_2.conv1.bias'] + new['decoder']['4.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_2.norm2.weight'] + new['decoder']['4.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_2.norm2.bias'] + new['decoder']['4.conv_2.weight'] = s['first_stage_model.decoder.mid.block_2.conv2.weight'] + new['decoder']['4.conv_2.bias'] = s['first_stage_model.decoder.mid.block_2.conv2.bias'] + new['decoder']['20.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.0.norm1.weight'] + new['decoder']['20.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.0.norm1.bias'] + new['decoder']['20.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.0.conv1.weight'] + new['decoder']['20.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.0.conv1.bias'] + new['decoder']['20.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.0.norm2.weight'] + new['decoder']['20.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.0.norm2.bias'] + new['decoder']['20.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.0.conv2.weight'] + new['decoder']['20.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.0.conv2.bias'] + new['decoder']['20.residual_layer.weight'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.weight'] + new['decoder']['20.residual_layer.bias'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.bias'] + new['decoder']['21.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.1.norm1.weight'] + new['decoder']['21.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.1.norm1.bias'] + new['decoder']['21.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.1.conv1.weight'] + new['decoder']['21.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.1.conv1.bias'] + new['decoder']['21.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.1.norm2.weight'] + new['decoder']['21.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.1.norm2.bias'] + new['decoder']['21.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.1.conv2.weight'] + new['decoder']['21.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.1.conv2.bias'] + new['decoder']['22.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.2.norm1.weight'] + new['decoder']['22.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.2.norm1.bias'] + new['decoder']['22.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.2.conv1.weight'] + new['decoder']['22.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.2.conv1.bias'] + new['decoder']['22.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.2.norm2.weight'] + new['decoder']['22.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.2.norm2.bias'] + new['decoder']['22.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.2.conv2.weight'] + new['decoder']['22.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.2.conv2.bias'] + new['decoder']['15.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.0.norm1.weight'] + new['decoder']['15.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.0.norm1.bias'] + new['decoder']['15.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.0.conv1.weight'] + new['decoder']['15.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.0.conv1.bias'] + new['decoder']['15.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.0.norm2.weight'] + new['decoder']['15.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.0.norm2.bias'] + new['decoder']['15.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.0.conv2.weight'] + new['decoder']['15.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.0.conv2.bias'] + new['decoder']['15.residual_layer.weight'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.weight'] + new['decoder']['15.residual_layer.bias'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.bias'] + new['decoder']['16.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.1.norm1.weight'] + new['decoder']['16.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.1.norm1.bias'] + new['decoder']['16.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.1.conv1.weight'] + new['decoder']['16.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.1.conv1.bias'] + new['decoder']['16.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.1.norm2.weight'] + new['decoder']['16.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.1.norm2.bias'] + new['decoder']['16.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.1.conv2.weight'] + new['decoder']['16.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.1.conv2.bias'] + new['decoder']['17.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.2.norm1.weight'] + new['decoder']['17.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.2.norm1.bias'] + new['decoder']['17.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.2.conv1.weight'] + new['decoder']['17.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.2.conv1.bias'] + new['decoder']['17.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.2.norm2.weight'] + new['decoder']['17.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.2.norm2.bias'] + new['decoder']['17.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.2.conv2.weight'] + new['decoder']['17.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.2.conv2.bias'] + new['decoder']['19.weight'] = s['first_stage_model.decoder.up.1.upsample.conv.weight'] + new['decoder']['19.bias'] = s['first_stage_model.decoder.up.1.upsample.conv.bias'] + new['decoder']['10.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.0.norm1.weight'] + new['decoder']['10.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.0.norm1.bias'] + new['decoder']['10.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.0.conv1.weight'] + new['decoder']['10.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.0.conv1.bias'] + new['decoder']['10.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.0.norm2.weight'] + new['decoder']['10.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.0.norm2.bias'] + new['decoder']['10.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.0.conv2.weight'] + new['decoder']['10.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.0.conv2.bias'] + new['decoder']['11.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.1.norm1.weight'] + new['decoder']['11.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.1.norm1.bias'] + new['decoder']['11.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.1.conv1.weight'] + new['decoder']['11.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.1.conv1.bias'] + new['decoder']['11.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.1.norm2.weight'] + new['decoder']['11.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.1.norm2.bias'] + new['decoder']['11.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.1.conv2.weight'] + new['decoder']['11.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.1.conv2.bias'] + new['decoder']['12.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.2.norm1.weight'] + new['decoder']['12.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.2.norm1.bias'] + new['decoder']['12.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.2.conv1.weight'] + new['decoder']['12.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.2.conv1.bias'] + new['decoder']['12.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.2.norm2.weight'] + new['decoder']['12.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.2.norm2.bias'] + new['decoder']['12.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.2.conv2.weight'] + new['decoder']['12.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.2.conv2.bias'] + new['decoder']['14.weight'] = s['first_stage_model.decoder.up.2.upsample.conv.weight'] + new['decoder']['14.bias'] = s['first_stage_model.decoder.up.2.upsample.conv.bias'] + new['decoder']['5.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.0.norm1.weight'] + new['decoder']['5.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.0.norm1.bias'] + new['decoder']['5.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.0.conv1.weight'] + new['decoder']['5.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.0.conv1.bias'] + new['decoder']['5.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.0.norm2.weight'] + new['decoder']['5.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.0.norm2.bias'] + new['decoder']['5.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.0.conv2.weight'] + new['decoder']['5.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.0.conv2.bias'] + new['decoder']['6.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.1.norm1.weight'] + new['decoder']['6.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.1.norm1.bias'] + new['decoder']['6.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.1.conv1.weight'] + new['decoder']['6.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.1.conv1.bias'] + new['decoder']['6.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.1.norm2.weight'] + new['decoder']['6.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.1.norm2.bias'] + new['decoder']['6.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.1.conv2.weight'] + new['decoder']['6.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.1.conv2.bias'] + new['decoder']['7.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.2.norm1.weight'] + new['decoder']['7.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.2.norm1.bias'] + new['decoder']['7.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.2.conv1.weight'] + new['decoder']['7.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.2.conv1.bias'] + new['decoder']['7.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.2.norm2.weight'] + new['decoder']['7.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.2.norm2.bias'] + new['decoder']['7.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.2.conv2.weight'] + new['decoder']['7.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.2.conv2.bias'] + new['decoder']['9.weight'] = s['first_stage_model.decoder.up.3.upsample.conv.weight'] + new['decoder']['9.bias'] = s['first_stage_model.decoder.up.3.upsample.conv.bias'] + new['decoder']['23.weight'] = s['first_stage_model.decoder.norm_out.weight'] + new['decoder']['23.bias'] = s['first_stage_model.decoder.norm_out.bias'] + new['decoder']['25.weight'] = s['first_stage_model.decoder.conv_out.weight'] + new['decoder']['25.bias'] = s['first_stage_model.decoder.conv_out.bias'] + new['encoder']['18.weight'] = s['first_stage_model.quant_conv.weight'] + new['encoder']['18.bias'] = s['first_stage_model.quant_conv.bias'] + new['decoder']['0.weight'] = s['first_stage_model.post_quant_conv.weight'] + new['decoder']['0.bias'] = s['first_stage_model.post_quant_conv.bias'] + new['clip']['embedding.token_embedding.weight'] = s['cond_stage_model.transformer.text_model.embeddings.token_embedding.weight'] + new['clip']['embedding.position_value'] = s['cond_stage_model.transformer.text_model.embeddings.position_embedding.weight'] + new['clip']['layers.0.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight'] + new['clip']['layers.0.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias'] + new['clip']['layers.0.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight'] + new['clip']['layers.0.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias'] + new['clip']['layers.0.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight'] + new['clip']['layers.0.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias'] + new['clip']['layers.0.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight'] + new['clip']['layers.0.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias'] + new['clip']['layers.0.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight'] + new['clip']['layers.0.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias'] + new['clip']['layers.1.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight'] + new['clip']['layers.1.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias'] + new['clip']['layers.1.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight'] + new['clip']['layers.1.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias'] + new['clip']['layers.1.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight'] + new['clip']['layers.1.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias'] + new['clip']['layers.1.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight'] + new['clip']['layers.1.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias'] + new['clip']['layers.1.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight'] + new['clip']['layers.1.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias'] + new['clip']['layers.2.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight'] + new['clip']['layers.2.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias'] + new['clip']['layers.2.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight'] + new['clip']['layers.2.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias'] + new['clip']['layers.2.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight'] + new['clip']['layers.2.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias'] + new['clip']['layers.2.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight'] + new['clip']['layers.2.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias'] + new['clip']['layers.2.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight'] + new['clip']['layers.2.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias'] + new['clip']['layers.3.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight'] + new['clip']['layers.3.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias'] + new['clip']['layers.3.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight'] + new['clip']['layers.3.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias'] + new['clip']['layers.3.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight'] + new['clip']['layers.3.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias'] + new['clip']['layers.3.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight'] + new['clip']['layers.3.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias'] + new['clip']['layers.3.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight'] + new['clip']['layers.3.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias'] + new['clip']['layers.4.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight'] + new['clip']['layers.4.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias'] + new['clip']['layers.4.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight'] + new['clip']['layers.4.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias'] + new['clip']['layers.4.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight'] + new['clip']['layers.4.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias'] + new['clip']['layers.4.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight'] + new['clip']['layers.4.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias'] + new['clip']['layers.4.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight'] + new['clip']['layers.4.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias'] + new['clip']['layers.5.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight'] + new['clip']['layers.5.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias'] + new['clip']['layers.5.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight'] + new['clip']['layers.5.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias'] + new['clip']['layers.5.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight'] + new['clip']['layers.5.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias'] + new['clip']['layers.5.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight'] + new['clip']['layers.5.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias'] + new['clip']['layers.5.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight'] + new['clip']['layers.5.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias'] + new['clip']['layers.6.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight'] + new['clip']['layers.6.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias'] + new['clip']['layers.6.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight'] + new['clip']['layers.6.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias'] + new['clip']['layers.6.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight'] + new['clip']['layers.6.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias'] + new['clip']['layers.6.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight'] + new['clip']['layers.6.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias'] + new['clip']['layers.6.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight'] + new['clip']['layers.6.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias'] + new['clip']['layers.7.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight'] + new['clip']['layers.7.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias'] + new['clip']['layers.7.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight'] + new['clip']['layers.7.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias'] + new['clip']['layers.7.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight'] + new['clip']['layers.7.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias'] + new['clip']['layers.7.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight'] + new['clip']['layers.7.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias'] + new['clip']['layers.7.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight'] + new['clip']['layers.7.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias'] + new['clip']['layers.8.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight'] + new['clip']['layers.8.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias'] + new['clip']['layers.8.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight'] + new['clip']['layers.8.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias'] + new['clip']['layers.8.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight'] + new['clip']['layers.8.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias'] + new['clip']['layers.8.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight'] + new['clip']['layers.8.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias'] + new['clip']['layers.8.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight'] + new['clip']['layers.8.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias'] + new['clip']['layers.9.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight'] + new['clip']['layers.9.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias'] + new['clip']['layers.9.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight'] + new['clip']['layers.9.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias'] + new['clip']['layers.9.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight'] + new['clip']['layers.9.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias'] + new['clip']['layers.9.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight'] + new['clip']['layers.9.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias'] + new['clip']['layers.9.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight'] + new['clip']['layers.9.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias'] + new['clip']['layers.10.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight'] + new['clip']['layers.10.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias'] + new['clip']['layers.10.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight'] + new['clip']['layers.10.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias'] + new['clip']['layers.10.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight'] + new['clip']['layers.10.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias'] + new['clip']['layers.10.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight'] + new['clip']['layers.10.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias'] + new['clip']['layers.10.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight'] + new['clip']['layers.10.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias'] + new['clip']['layers.11.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight'] + new['clip']['layers.11.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias'] + new['clip']['layers.11.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight'] + new['clip']['layers.11.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias'] + new['clip']['layers.11.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight'] + new['clip']['layers.11.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias'] + new['clip']['layers.11.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight'] + new['clip']['layers.11.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias'] + new['clip']['layers.11.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight'] + new['clip']['layers.11.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias'] + new['clip']['layernorm.weight'] = s['cond_stage_model.transformer.text_model.final_layer_norm.weight'] + new['clip']['layernorm.bias'] = s['cond_stage_model.transformer.text_model.final_layer_norm.bias'] + new['diffusion']['unet.encoders.1.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.2.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.bottleneck.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.3.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.6.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.9.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.10.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.11.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['encoder']['13.attention.in_proj.weight'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.weight'], s['first_stage_model.encoder.mid.attn_1.k.weight'], s['first_stage_model.encoder.mid.attn_1.v.weight']), 0).reshape((1536, 512)) + new['encoder']['13.attention.in_proj.bias'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.bias'], s['first_stage_model.encoder.mid.attn_1.k.bias'], s['first_stage_model.encoder.mid.attn_1.v.bias']), 0) + new['encoder']['13.attention.out_proj.weight'] = s['first_stage_model.encoder.mid.attn_1.proj_out.weight'].reshape((512, 512)) + new['decoder']['3.attention.in_proj.weight'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.weight'], s['first_stage_model.decoder.mid.attn_1.k.weight'], s['first_stage_model.decoder.mid.attn_1.v.weight']), 0).reshape((1536, 512)) + new['decoder']['3.attention.in_proj.bias'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.bias'], s['first_stage_model.decoder.mid.attn_1.k.bias'], s['first_stage_model.decoder.mid.attn_1.v.bias']), 0) + new['decoder']['3.attention.out_proj.weight'] = s['first_stage_model.decoder.mid.attn_1.proj_out.weight'].reshape((512, 512)) + new['clip']['layers.0.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight']), 0) + new['clip']['layers.0.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias']), 0) + new['clip']['layers.1.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight']), 0) + new['clip']['layers.1.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias']), 0) + new['clip']['layers.2.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight']), 0) + new['clip']['layers.2.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias']), 0) + new['clip']['layers.3.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight']), 0) + new['clip']['layers.3.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias']), 0) + new['clip']['layers.4.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight']), 0) + new['clip']['layers.4.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias']), 0) + new['clip']['layers.5.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight']), 0) + new['clip']['layers.5.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias']), 0) + new['clip']['layers.6.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight']), 0) + new['clip']['layers.6.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias']), 0) + new['clip']['layers.7.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight']), 0) + new['clip']['layers.7.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias']), 0) + new['clip']['layers.8.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight']), 0) + new['clip']['layers.8.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias']), 0) + new['clip']['layers.9.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight']), 0) + new['clip']['layers.9.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias']), 0) + new['clip']['layers.10.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight']), 0) + new['clip']['layers.10.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias']), 0) + new['clip']['layers.11.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight']), 0) + new['clip']['layers.11.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias']), 0) + return new + + diff --git a/stable_diffusion_pytorch/model_loader.py b/stable_diffusion_pytorch/model_loader.py index 9e5bd89..1756ad6 100644 --- a/stable_diffusion_pytorch/model_loader.py +++ b/stable_diffusion_pytorch/model_loader.py @@ -31,42 +31,86 @@ def make_compatible(state_dict): return state_dict -def load_clip(device): +# The following functions load the default models, used when the user supplied `models` is missing +# one or more of (clip, encoder, decoder, diffusion). + +def load_clip(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/clip.pt')) state_dict = make_compatible(state_dict) - clip = CLIP().to(device) + clip = CLIP().to(dtype).to(device) clip.load_state_dict(state_dict) return clip -def load_encoder(device): +def load_encoder(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/encoder.pt')) state_dict = make_compatible(state_dict) - encoder = Encoder().to(device) + encoder = Encoder().to(dtype).to(device) encoder.load_state_dict(state_dict) return encoder -def load_decoder(device): +def load_decoder(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/decoder.pt')) state_dict = make_compatible(state_dict) - decoder = Decoder().to(device) + decoder = Decoder().to(dtype).to(device) decoder.load_state_dict(state_dict) return decoder -def load_diffusion(device): +def load_diffusion(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/diffusion.pt')) state_dict = make_compatible(state_dict) - diffusion = Diffusion().to(device) + diffusion = Diffusion().to(dtype).to(device) diffusion.load_state_dict(state_dict) return diffusion -def preload_models(device): +r""" + Create and load the 4 default models (clip, encoder, decoder, diffusion). + Args: + device (`str`): + The device to run the models on, passed to model.to() + dtype (`torch.dtype`, *optional*): + The data type of the model to create. Note that this can be different + from the data type of the checkpoint file, and pytorch will auto convert. + Returns: + `Dict[str, torch.nn.Module]`: + The loaded models to be passed to pipeline.generate() + """ +def preload_models(device, dtype=torch.float32): return { - 'clip': load_clip(device), - 'encoder': load_encoder(device), - 'decoder': load_decoder(device), - 'diffusion': load_diffusion(device), - } \ No newline at end of file + 'clip': load_clip(device, dtype), + 'encoder': load_encoder(device, dtype), + 'decoder': load_decoder(device, dtype), + 'diffusion': load_diffusion(device, dtype), + } + +r""" + Create the 4 models that the pipeline expects and load the weights from state_dicts + (not an original stable diffusion state_dict!). + Args: + state_dicts (`Dict[str, str]`): + A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights + for that model. You can pass in the dict returned by convert_from_sdmodel.split_state_dict(). + device (`str`): + The device to run the models on, passed to model.to() + dtype (`torch.dtype`, *optional*): + The data type of the model to create. Note that this can be different + from the data type of the checkpoint file, and pytorch will auto convert. + Returns: + `Dict[str, torch.nn.Module]`: + The loaded models to be passed to pipeline.generate() + """ +def load_models(state_dicts, device, dtype=torch.float32): + models = {} + models['clip'] = CLIP().to(dtype).to(device) + models['encoder'] = Encoder().to(dtype).to(device) + models['decoder'] = Decoder().to(dtype).to(device) + models['diffusion'] = Diffusion().to(dtype).to(device) + + models['clip'].load_state_dict(state_dicts['clip']) + models['encoder'].load_state_dict(state_dicts['encoder']) + models['decoder'].load_state_dict(state_dicts['decoder']) + models['diffusion'].load_state_dict(state_dicts['diffusion']) + return models diff --git a/stable_diffusion_pytorch/pipeline.py b/stable_diffusion_pytorch/pipeline.py index b5d1868..3d16208 100644 --- a/stable_diffusion_pytorch/pipeline.py +++ b/stable_diffusion_pytorch/pipeline.py @@ -58,6 +58,8 @@ def generate( expense of slower inference. This parameter will be modulated by `strength`. models (`Dict[str, torch.nn.Module]`, *optional*): Preloaded models. If some or all models are not provided, they will be loaded dynamically. + Note that auto-loaded modules default to float32, so if you are using float16 models, you must + provide all 4 models. seed (`int`, *optional*): A seed to make generation deterministic. device (`str` or `torch.device`, *optional*):