From 185f69e1fed9500448ffbe7150bf79d4e53e4d6b Mon Sep 17 00:00:00 2001 From: a Date: Fri, 1 Mar 2024 15:41:32 +0000 Subject: [PATCH 01/11] add checkpoint_loader, support original stable diffusion checkpoints --- stable_diffusion_pytorch/checkpoint_loader.py | 1101 +++++++++++++++++ 1 file changed, 1101 insertions(+) create mode 100644 stable_diffusion_pytorch/checkpoint_loader.py diff --git a/stable_diffusion_pytorch/checkpoint_loader.py b/stable_diffusion_pytorch/checkpoint_loader.py new file mode 100644 index 0000000..c21d27a --- /dev/null +++ b/stable_diffusion_pytorch/checkpoint_loader.py @@ -0,0 +1,1101 @@ +import torch +import safetensors.torch +from . import Tokenizer, CLIP, Encoder, Decoder, Diffusion + +r""" + Create the 4 models that the pipeline expects and load the weights from state_dicts (not an original stable diffusion state_dict!). + Args: + state_dicts (`Dict[str, str]`): + A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights for that model. + You can pass in the dict returned by split_state_dict(). + device (`str`): + The device to run the models on, passed to model.to() + useHalfPrecision (`bool`, *optional*): + If true, use float16, otherwise float32. + Returns: + `Dict[str, torch.nn.Module]`: + The loaded models to be passed to pipeline.generate() + """ +def load_models(state_dicts, device, useHalfPrecision=False): + models = {} + if useHalfPrecision: + models['clip'] = CLIP().to(device).half() + models['encoder'] = Encoder().to(device).half() + models['decoder'] = Decoder().to(device).half() + models['diffusion'] = Diffusion().to(device).half() + else: + models['clip'] = CLIP().to(device) + models['encoder'] = Encoder().to(device) + models['decoder'] = Decoder().to(device) + models['diffusion'] = Diffusion().to(device) + + models['clip'].load_state_dict(state_dicts['clip']) + models['encoder'].load_state_dict(state_dicts['encoder']) + models['decoder'].load_state_dict(state_dicts['decoder']) + models['diffusion'].load_state_dict(state_dicts['diffusion']) + return models + +r""" + Our library model implementation is laid out differently from the original stable diffusion, so + original SD state_dict can not be directly loaded. This function converts an original SD + state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that our models expect. + Args: + state_dict (`Dict[str, str]`): + The original state_dict to convert. + Returns: + `Dict[str, Dict[str, str]]`: + The converted state dicts. + """ +def split_state_dict(state_dict): + new = {} + new['diffusion'] = {} + new['encoder'] = {} + new['decoder'] = {} + new['clip'] = {} + + s = state_dict + new['diffusion']['time_embedding.linear_1.weight'] = s['model.diffusion_model.time_embed.0.weight'] + new['diffusion']['time_embedding.linear_1.bias'] = s['model.diffusion_model.time_embed.0.bias'] + new['diffusion']['time_embedding.linear_2.weight'] = s['model.diffusion_model.time_embed.2.weight'] + new['diffusion']['time_embedding.linear_2.bias'] = s['model.diffusion_model.time_embed.2.bias'] + new['diffusion']['unet.encoders.0.0.weight'] = s['model.diffusion_model.input_blocks.0.0.weight'] + new['diffusion']['unet.encoders.0.0.bias'] = s['model.diffusion_model.input_blocks.0.0.bias'] + new['diffusion']['unet.encoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.1.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.1.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.1.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.1.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.1.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.1.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.1.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.1.1.norm.weight'] + new['diffusion']['unet.encoders.1.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.1.1.norm.bias'] + new['diffusion']['unet.encoders.1.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_in.weight'] + new['diffusion']['unet.encoders.1.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_in.bias'] + new['diffusion']['unet.encoders.1.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.1.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.1.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.1.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.1.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.1.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.1.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.1.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.1.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.1.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.1.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_out.weight'] + new['diffusion']['unet.encoders.1.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_out.bias'] + new['diffusion']['unet.encoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.2.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.2.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.2.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.2.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.2.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.2.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.2.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.2.1.norm.weight'] + new['diffusion']['unet.encoders.2.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.2.1.norm.bias'] + new['diffusion']['unet.encoders.2.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_in.weight'] + new['diffusion']['unet.encoders.2.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_in.bias'] + new['diffusion']['unet.encoders.2.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.2.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.2.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.2.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.2.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.2.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.2.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.2.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.2.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.2.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.2.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_out.weight'] + new['diffusion']['unet.encoders.2.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_out.bias'] + new['diffusion']['unet.encoders.3.0.weight'] = s['model.diffusion_model.input_blocks.3.0.op.weight'] + new['diffusion']['unet.encoders.3.0.bias'] = s['model.diffusion_model.input_blocks.3.0.op.bias'] + new['diffusion']['unet.encoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.4.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.4.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.4.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.4.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.4.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.4.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.4.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.weight'] + new['diffusion']['unet.encoders.4.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.bias'] + new['diffusion']['unet.encoders.4.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.4.1.norm.weight'] + new['diffusion']['unet.encoders.4.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.4.1.norm.bias'] + new['diffusion']['unet.encoders.4.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_in.weight'] + new['diffusion']['unet.encoders.4.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_in.bias'] + new['diffusion']['unet.encoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.4.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_out.weight'] + new['diffusion']['unet.encoders.4.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_out.bias'] + new['diffusion']['unet.encoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.5.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.5.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.5.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.5.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.5.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.5.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.5.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.5.1.norm.weight'] + new['diffusion']['unet.encoders.5.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.5.1.norm.bias'] + new['diffusion']['unet.encoders.5.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_in.weight'] + new['diffusion']['unet.encoders.5.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_in.bias'] + new['diffusion']['unet.encoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.5.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_out.weight'] + new['diffusion']['unet.encoders.5.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_out.bias'] + new['diffusion']['unet.encoders.6.0.weight'] = s['model.diffusion_model.input_blocks.6.0.op.weight'] + new['diffusion']['unet.encoders.6.0.bias'] = s['model.diffusion_model.input_blocks.6.0.op.bias'] + new['diffusion']['unet.encoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.7.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.7.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.7.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.7.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.7.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.7.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.7.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.weight'] + new['diffusion']['unet.encoders.7.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.bias'] + new['diffusion']['unet.encoders.7.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.7.1.norm.weight'] + new['diffusion']['unet.encoders.7.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.7.1.norm.bias'] + new['diffusion']['unet.encoders.7.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_in.weight'] + new['diffusion']['unet.encoders.7.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_in.bias'] + new['diffusion']['unet.encoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.7.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_out.weight'] + new['diffusion']['unet.encoders.7.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_out.bias'] + new['diffusion']['unet.encoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.8.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.8.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.8.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.8.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.8.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.8.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.8.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.8.1.norm.weight'] + new['diffusion']['unet.encoders.8.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.8.1.norm.bias'] + new['diffusion']['unet.encoders.8.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_in.weight'] + new['diffusion']['unet.encoders.8.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_in.bias'] + new['diffusion']['unet.encoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.encoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.encoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.encoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.encoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.encoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.encoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.encoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.encoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.encoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.encoders.8.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_out.weight'] + new['diffusion']['unet.encoders.8.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_out.bias'] + new['diffusion']['unet.encoders.9.0.weight'] = s['model.diffusion_model.input_blocks.9.0.op.weight'] + new['diffusion']['unet.encoders.9.0.bias'] = s['model.diffusion_model.input_blocks.9.0.op.bias'] + new['diffusion']['unet.encoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.10.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.10.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.10.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.10.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.10.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.10.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.bias'] + new['diffusion']['unet.encoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.weight'] + new['diffusion']['unet.encoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.bias'] + new['diffusion']['unet.encoders.11.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.weight'] + new['diffusion']['unet.encoders.11.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.bias'] + new['diffusion']['unet.encoders.11.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.weight'] + new['diffusion']['unet.encoders.11.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.bias'] + new['diffusion']['unet.encoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.weight'] + new['diffusion']['unet.encoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.bias'] + new['diffusion']['unet.encoders.11.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.weight'] + new['diffusion']['unet.encoders.11.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.bias'] + new['diffusion']['unet.bottleneck.0.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.0.weight'] + new['diffusion']['unet.bottleneck.0.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.0.bias'] + new['diffusion']['unet.bottleneck.0.conv_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.2.weight'] + new['diffusion']['unet.bottleneck.0.conv_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.2.bias'] + new['diffusion']['unet.bottleneck.0.linear_time.weight'] = s['model.diffusion_model.middle_block.0.emb_layers.1.weight'] + new['diffusion']['unet.bottleneck.0.linear_time.bias'] = s['model.diffusion_model.middle_block.0.emb_layers.1.bias'] + new['diffusion']['unet.bottleneck.0.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.0.weight'] + new['diffusion']['unet.bottleneck.0.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.0.bias'] + new['diffusion']['unet.bottleneck.0.conv_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.3.weight'] + new['diffusion']['unet.bottleneck.0.conv_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.3.bias'] + new['diffusion']['unet.bottleneck.1.groupnorm.weight'] = s['model.diffusion_model.middle_block.1.norm.weight'] + new['diffusion']['unet.bottleneck.1.groupnorm.bias'] = s['model.diffusion_model.middle_block.1.norm.bias'] + new['diffusion']['unet.bottleneck.1.conv_input.weight'] = s['model.diffusion_model.middle_block.1.proj_in.weight'] + new['diffusion']['unet.bottleneck.1.conv_input.bias'] = s['model.diffusion_model.middle_block.1.proj_in.bias'] + new['diffusion']['unet.bottleneck.1.attention_1.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.bottleneck.1.attention_1.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.bottleneck.1.linear_geglu_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.bottleneck.1.linear_geglu_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.bottleneck.1.linear_geglu_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.bottleneck.1.linear_geglu_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.bottleneck.1.attention_2.q_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.k_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.v_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.bottleneck.1.attention_2.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.bottleneck.1.layernorm_3.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.bottleneck.1.layernorm_3.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.bottleneck.1.conv_output.weight'] = s['model.diffusion_model.middle_block.1.proj_out.weight'] + new['diffusion']['unet.bottleneck.1.conv_output.bias'] = s['model.diffusion_model.middle_block.1.proj_out.bias'] + new['diffusion']['unet.bottleneck.2.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.0.weight'] + new['diffusion']['unet.bottleneck.2.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.0.bias'] + new['diffusion']['unet.bottleneck.2.conv_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.2.weight'] + new['diffusion']['unet.bottleneck.2.conv_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.2.bias'] + new['diffusion']['unet.bottleneck.2.linear_time.weight'] = s['model.diffusion_model.middle_block.2.emb_layers.1.weight'] + new['diffusion']['unet.bottleneck.2.linear_time.bias'] = s['model.diffusion_model.middle_block.2.emb_layers.1.bias'] + new['diffusion']['unet.bottleneck.2.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.0.weight'] + new['diffusion']['unet.bottleneck.2.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.0.bias'] + new['diffusion']['unet.bottleneck.2.conv_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.3.weight'] + new['diffusion']['unet.bottleneck.2.conv_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.3.bias'] + new['diffusion']['unet.decoders.0.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.0.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.0.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.0.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.0.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.0.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.0.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.0.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.0.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.0.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.0.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.weight'] + new['diffusion']['unet.decoders.0.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.bias'] + new['diffusion']['unet.decoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.1.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.1.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.1.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.1.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.1.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.1.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.1.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.weight'] + new['diffusion']['unet.decoders.1.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.bias'] + new['diffusion']['unet.decoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.2.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.2.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.2.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.2.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.2.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.2.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.2.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.weight'] + new['diffusion']['unet.decoders.2.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.bias'] + new['diffusion']['unet.decoders.2.1.conv.weight'] = s['model.diffusion_model.output_blocks.2.1.conv.weight'] + new['diffusion']['unet.decoders.2.1.conv.bias'] = s['model.diffusion_model.output_blocks.2.1.conv.bias'] + new['diffusion']['unet.decoders.3.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.3.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.3.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.3.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.3.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.3.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.3.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.3.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.3.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.3.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.3.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.weight'] + new['diffusion']['unet.decoders.3.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.bias'] + new['diffusion']['unet.decoders.3.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.3.1.norm.weight'] + new['diffusion']['unet.decoders.3.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.3.1.norm.bias'] + new['diffusion']['unet.decoders.3.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_in.weight'] + new['diffusion']['unet.decoders.3.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_in.bias'] + new['diffusion']['unet.decoders.3.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.3.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.3.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.3.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.3.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.3.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.3.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.3.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.3.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.3.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.3.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_out.weight'] + new['diffusion']['unet.decoders.3.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_out.bias'] + new['diffusion']['unet.decoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.4.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.4.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.4.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.4.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.4.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.4.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.4.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.weight'] + new['diffusion']['unet.decoders.4.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.bias'] + new['diffusion']['unet.decoders.4.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.4.1.norm.weight'] + new['diffusion']['unet.decoders.4.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.4.1.norm.bias'] + new['diffusion']['unet.decoders.4.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_in.weight'] + new['diffusion']['unet.decoders.4.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_in.bias'] + new['diffusion']['unet.decoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.4.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_out.weight'] + new['diffusion']['unet.decoders.4.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_out.bias'] + new['diffusion']['unet.decoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.5.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.5.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.5.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.5.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.5.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.5.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.5.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.weight'] + new['diffusion']['unet.decoders.5.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.bias'] + new['diffusion']['unet.decoders.5.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.5.1.norm.weight'] + new['diffusion']['unet.decoders.5.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.5.1.norm.bias'] + new['diffusion']['unet.decoders.5.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_in.weight'] + new['diffusion']['unet.decoders.5.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_in.bias'] + new['diffusion']['unet.decoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.5.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_out.weight'] + new['diffusion']['unet.decoders.5.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_out.bias'] + new['diffusion']['unet.decoders.5.2.conv.weight'] = s['model.diffusion_model.output_blocks.5.2.conv.weight'] + new['diffusion']['unet.decoders.5.2.conv.bias'] = s['model.diffusion_model.output_blocks.5.2.conv.bias'] + new['diffusion']['unet.decoders.6.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.6.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.6.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.6.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.6.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.6.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.6.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.6.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.6.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.6.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.6.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.weight'] + new['diffusion']['unet.decoders.6.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.bias'] + new['diffusion']['unet.decoders.6.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.6.1.norm.weight'] + new['diffusion']['unet.decoders.6.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.6.1.norm.bias'] + new['diffusion']['unet.decoders.6.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_in.weight'] + new['diffusion']['unet.decoders.6.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_in.bias'] + new['diffusion']['unet.decoders.6.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.6.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.6.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.6.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.6.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.6.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.6.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.6.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.6.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.6.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.6.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_out.weight'] + new['diffusion']['unet.decoders.6.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_out.bias'] + new['diffusion']['unet.decoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.7.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.7.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.7.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.7.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.7.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.7.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.7.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.weight'] + new['diffusion']['unet.decoders.7.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.bias'] + new['diffusion']['unet.decoders.7.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.7.1.norm.weight'] + new['diffusion']['unet.decoders.7.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.7.1.norm.bias'] + new['diffusion']['unet.decoders.7.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_in.weight'] + new['diffusion']['unet.decoders.7.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_in.bias'] + new['diffusion']['unet.decoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.7.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_out.weight'] + new['diffusion']['unet.decoders.7.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_out.bias'] + new['diffusion']['unet.decoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.8.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.8.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.8.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.8.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.8.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.8.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.8.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.weight'] + new['diffusion']['unet.decoders.8.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.bias'] + new['diffusion']['unet.decoders.8.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.8.1.norm.weight'] + new['diffusion']['unet.decoders.8.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.8.1.norm.bias'] + new['diffusion']['unet.decoders.8.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_in.weight'] + new['diffusion']['unet.decoders.8.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_in.bias'] + new['diffusion']['unet.decoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.8.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_out.weight'] + new['diffusion']['unet.decoders.8.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_out.bias'] + new['diffusion']['unet.decoders.8.2.conv.weight'] = s['model.diffusion_model.output_blocks.8.2.conv.weight'] + new['diffusion']['unet.decoders.8.2.conv.bias'] = s['model.diffusion_model.output_blocks.8.2.conv.bias'] + new['diffusion']['unet.decoders.9.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.9.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.9.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.9.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.9.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.9.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.9.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.9.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.9.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.9.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.9.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.weight'] + new['diffusion']['unet.decoders.9.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.bias'] + new['diffusion']['unet.decoders.9.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.9.1.norm.weight'] + new['diffusion']['unet.decoders.9.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.9.1.norm.bias'] + new['diffusion']['unet.decoders.9.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_in.weight'] + new['diffusion']['unet.decoders.9.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_in.bias'] + new['diffusion']['unet.decoders.9.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.9.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.9.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.9.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.9.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.9.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.9.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.9.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.9.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.9.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.9.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_out.weight'] + new['diffusion']['unet.decoders.9.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_out.bias'] + new['diffusion']['unet.decoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.10.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.10.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.10.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.10.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.10.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.10.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.10.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.weight'] + new['diffusion']['unet.decoders.10.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.bias'] + new['diffusion']['unet.decoders.10.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.10.1.norm.weight'] + new['diffusion']['unet.decoders.10.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.10.1.norm.bias'] + new['diffusion']['unet.decoders.10.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_in.weight'] + new['diffusion']['unet.decoders.10.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_in.bias'] + new['diffusion']['unet.decoders.10.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.10.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.10.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.10.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.10.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.10.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.10.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.10.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.10.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.10.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.10.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_out.weight'] + new['diffusion']['unet.decoders.10.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_out.bias'] + new['diffusion']['unet.decoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.weight'] + new['diffusion']['unet.decoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.bias'] + new['diffusion']['unet.decoders.11.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.weight'] + new['diffusion']['unet.decoders.11.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.bias'] + new['diffusion']['unet.decoders.11.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.weight'] + new['diffusion']['unet.decoders.11.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.bias'] + new['diffusion']['unet.decoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.weight'] + new['diffusion']['unet.decoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.bias'] + new['diffusion']['unet.decoders.11.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.weight'] + new['diffusion']['unet.decoders.11.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.bias'] + new['diffusion']['unet.decoders.11.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.weight'] + new['diffusion']['unet.decoders.11.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.bias'] + new['diffusion']['unet.decoders.11.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.11.1.norm.weight'] + new['diffusion']['unet.decoders.11.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.11.1.norm.bias'] + new['diffusion']['unet.decoders.11.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_in.weight'] + new['diffusion']['unet.decoders.11.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_in.bias'] + new['diffusion']['unet.decoders.11.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight'] + new['diffusion']['unet.decoders.11.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias'] + new['diffusion']['unet.decoders.11.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight'] + new['diffusion']['unet.decoders.11.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias'] + new['diffusion']['unet.decoders.11.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight'] + new['diffusion']['unet.decoders.11.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias'] + new['diffusion']['unet.decoders.11.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight'] + new['diffusion']['unet.decoders.11.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias'] + new['diffusion']['unet.decoders.11.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight'] + new['diffusion']['unet.decoders.11.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias'] + new['diffusion']['unet.decoders.11.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_out.weight'] + new['diffusion']['unet.decoders.11.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_out.bias'] + new['diffusion']['final.groupnorm.weight'] = s['model.diffusion_model.out.0.weight'] + new['diffusion']['final.groupnorm.bias'] = s['model.diffusion_model.out.0.bias'] + new['diffusion']['final.conv.weight'] = s['model.diffusion_model.out.2.weight'] + new['diffusion']['final.conv.bias'] = s['model.diffusion_model.out.2.bias'] + new['encoder']['0.weight'] = s['first_stage_model.encoder.conv_in.weight'] + new['encoder']['0.bias'] = s['first_stage_model.encoder.conv_in.bias'] + new['encoder']['1.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.0.norm1.weight'] + new['encoder']['1.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.0.norm1.bias'] + new['encoder']['1.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.0.conv1.weight'] + new['encoder']['1.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.0.conv1.bias'] + new['encoder']['1.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.0.norm2.weight'] + new['encoder']['1.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.0.norm2.bias'] + new['encoder']['1.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.0.conv2.weight'] + new['encoder']['1.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.0.conv2.bias'] + new['encoder']['2.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.1.norm1.weight'] + new['encoder']['2.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.1.norm1.bias'] + new['encoder']['2.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.1.conv1.weight'] + new['encoder']['2.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.1.conv1.bias'] + new['encoder']['2.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.1.norm2.weight'] + new['encoder']['2.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.1.norm2.bias'] + new['encoder']['2.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.1.conv2.weight'] + new['encoder']['2.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.1.conv2.bias'] + new['encoder']['3.weight'] = s['first_stage_model.encoder.down.0.downsample.conv.weight'] + new['encoder']['3.bias'] = s['first_stage_model.encoder.down.0.downsample.conv.bias'] + new['encoder']['4.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.0.norm1.weight'] + new['encoder']['4.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.0.norm1.bias'] + new['encoder']['4.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.0.conv1.weight'] + new['encoder']['4.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.0.conv1.bias'] + new['encoder']['4.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.0.norm2.weight'] + new['encoder']['4.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.0.norm2.bias'] + new['encoder']['4.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.0.conv2.weight'] + new['encoder']['4.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.0.conv2.bias'] + new['encoder']['4.residual_layer.weight'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.weight'] + new['encoder']['4.residual_layer.bias'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.bias'] + new['encoder']['5.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.1.norm1.weight'] + new['encoder']['5.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.1.norm1.bias'] + new['encoder']['5.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.1.conv1.weight'] + new['encoder']['5.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.1.conv1.bias'] + new['encoder']['5.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.1.norm2.weight'] + new['encoder']['5.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.1.norm2.bias'] + new['encoder']['5.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.1.conv2.weight'] + new['encoder']['5.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.1.conv2.bias'] + new['encoder']['6.weight'] = s['first_stage_model.encoder.down.1.downsample.conv.weight'] + new['encoder']['6.bias'] = s['first_stage_model.encoder.down.1.downsample.conv.bias'] + new['encoder']['7.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.0.norm1.weight'] + new['encoder']['7.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.0.norm1.bias'] + new['encoder']['7.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.0.conv1.weight'] + new['encoder']['7.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.0.conv1.bias'] + new['encoder']['7.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.0.norm2.weight'] + new['encoder']['7.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.0.norm2.bias'] + new['encoder']['7.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.0.conv2.weight'] + new['encoder']['7.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.0.conv2.bias'] + new['encoder']['7.residual_layer.weight'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.weight'] + new['encoder']['7.residual_layer.bias'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.bias'] + new['encoder']['8.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.1.norm1.weight'] + new['encoder']['8.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.1.norm1.bias'] + new['encoder']['8.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.1.conv1.weight'] + new['encoder']['8.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.1.conv1.bias'] + new['encoder']['8.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.1.norm2.weight'] + new['encoder']['8.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.1.norm2.bias'] + new['encoder']['8.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.1.conv2.weight'] + new['encoder']['8.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.1.conv2.bias'] + new['encoder']['9.weight'] = s['first_stage_model.encoder.down.2.downsample.conv.weight'] + new['encoder']['9.bias'] = s['first_stage_model.encoder.down.2.downsample.conv.bias'] + new['encoder']['10.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.0.norm1.weight'] + new['encoder']['10.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.0.norm1.bias'] + new['encoder']['10.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.0.conv1.weight'] + new['encoder']['10.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.0.conv1.bias'] + new['encoder']['10.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.0.norm2.weight'] + new['encoder']['10.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.0.norm2.bias'] + new['encoder']['10.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.0.conv2.weight'] + new['encoder']['10.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.0.conv2.bias'] + new['encoder']['11.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.1.norm1.weight'] + new['encoder']['11.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.1.norm1.bias'] + new['encoder']['11.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.1.conv1.weight'] + new['encoder']['11.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.1.conv1.bias'] + new['encoder']['11.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.1.norm2.weight'] + new['encoder']['11.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.1.norm2.bias'] + new['encoder']['11.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.1.conv2.weight'] + new['encoder']['11.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.1.conv2.bias'] + new['encoder']['12.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_1.norm1.weight'] + new['encoder']['12.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_1.norm1.bias'] + new['encoder']['12.conv_1.weight'] = s['first_stage_model.encoder.mid.block_1.conv1.weight'] + new['encoder']['12.conv_1.bias'] = s['first_stage_model.encoder.mid.block_1.conv1.bias'] + new['encoder']['12.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_1.norm2.weight'] + new['encoder']['12.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_1.norm2.bias'] + new['encoder']['12.conv_2.weight'] = s['first_stage_model.encoder.mid.block_1.conv2.weight'] + new['encoder']['12.conv_2.bias'] = s['first_stage_model.encoder.mid.block_1.conv2.bias'] + new['encoder']['13.groupnorm.weight'] = s['first_stage_model.encoder.mid.attn_1.norm.weight'] + new['encoder']['13.groupnorm.bias'] = s['first_stage_model.encoder.mid.attn_1.norm.bias'] + new['encoder']['13.attention.out_proj.bias'] = s['first_stage_model.encoder.mid.attn_1.proj_out.bias'] + new['encoder']['14.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_2.norm1.weight'] + new['encoder']['14.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_2.norm1.bias'] + new['encoder']['14.conv_1.weight'] = s['first_stage_model.encoder.mid.block_2.conv1.weight'] + new['encoder']['14.conv_1.bias'] = s['first_stage_model.encoder.mid.block_2.conv1.bias'] + new['encoder']['14.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_2.norm2.weight'] + new['encoder']['14.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_2.norm2.bias'] + new['encoder']['14.conv_2.weight'] = s['first_stage_model.encoder.mid.block_2.conv2.weight'] + new['encoder']['14.conv_2.bias'] = s['first_stage_model.encoder.mid.block_2.conv2.bias'] + new['encoder']['15.weight'] = s['first_stage_model.encoder.norm_out.weight'] + new['encoder']['15.bias'] = s['first_stage_model.encoder.norm_out.bias'] + new['encoder']['17.weight'] = s['first_stage_model.encoder.conv_out.weight'] + new['encoder']['17.bias'] = s['first_stage_model.encoder.conv_out.bias'] + new['decoder']['1.weight'] = s['first_stage_model.decoder.conv_in.weight'] + new['decoder']['1.bias'] = s['first_stage_model.decoder.conv_in.bias'] + new['decoder']['2.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_1.norm1.weight'] + new['decoder']['2.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_1.norm1.bias'] + new['decoder']['2.conv_1.weight'] = s['first_stage_model.decoder.mid.block_1.conv1.weight'] + new['decoder']['2.conv_1.bias'] = s['first_stage_model.decoder.mid.block_1.conv1.bias'] + new['decoder']['2.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_1.norm2.weight'] + new['decoder']['2.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_1.norm2.bias'] + new['decoder']['2.conv_2.weight'] = s['first_stage_model.decoder.mid.block_1.conv2.weight'] + new['decoder']['2.conv_2.bias'] = s['first_stage_model.decoder.mid.block_1.conv2.bias'] + new['decoder']['3.groupnorm.weight'] = s['first_stage_model.decoder.mid.attn_1.norm.weight'] + new['decoder']['3.groupnorm.bias'] = s['first_stage_model.decoder.mid.attn_1.norm.bias'] + new['decoder']['3.attention.out_proj.bias'] = s['first_stage_model.decoder.mid.attn_1.proj_out.bias'] + new['decoder']['4.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_2.norm1.weight'] + new['decoder']['4.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_2.norm1.bias'] + new['decoder']['4.conv_1.weight'] = s['first_stage_model.decoder.mid.block_2.conv1.weight'] + new['decoder']['4.conv_1.bias'] = s['first_stage_model.decoder.mid.block_2.conv1.bias'] + new['decoder']['4.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_2.norm2.weight'] + new['decoder']['4.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_2.norm2.bias'] + new['decoder']['4.conv_2.weight'] = s['first_stage_model.decoder.mid.block_2.conv2.weight'] + new['decoder']['4.conv_2.bias'] = s['first_stage_model.decoder.mid.block_2.conv2.bias'] + new['decoder']['20.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.0.norm1.weight'] + new['decoder']['20.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.0.norm1.bias'] + new['decoder']['20.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.0.conv1.weight'] + new['decoder']['20.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.0.conv1.bias'] + new['decoder']['20.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.0.norm2.weight'] + new['decoder']['20.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.0.norm2.bias'] + new['decoder']['20.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.0.conv2.weight'] + new['decoder']['20.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.0.conv2.bias'] + new['decoder']['20.residual_layer.weight'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.weight'] + new['decoder']['20.residual_layer.bias'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.bias'] + new['decoder']['21.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.1.norm1.weight'] + new['decoder']['21.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.1.norm1.bias'] + new['decoder']['21.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.1.conv1.weight'] + new['decoder']['21.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.1.conv1.bias'] + new['decoder']['21.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.1.norm2.weight'] + new['decoder']['21.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.1.norm2.bias'] + new['decoder']['21.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.1.conv2.weight'] + new['decoder']['21.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.1.conv2.bias'] + new['decoder']['22.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.2.norm1.weight'] + new['decoder']['22.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.2.norm1.bias'] + new['decoder']['22.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.2.conv1.weight'] + new['decoder']['22.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.2.conv1.bias'] + new['decoder']['22.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.2.norm2.weight'] + new['decoder']['22.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.2.norm2.bias'] + new['decoder']['22.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.2.conv2.weight'] + new['decoder']['22.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.2.conv2.bias'] + new['decoder']['15.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.0.norm1.weight'] + new['decoder']['15.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.0.norm1.bias'] + new['decoder']['15.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.0.conv1.weight'] + new['decoder']['15.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.0.conv1.bias'] + new['decoder']['15.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.0.norm2.weight'] + new['decoder']['15.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.0.norm2.bias'] + new['decoder']['15.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.0.conv2.weight'] + new['decoder']['15.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.0.conv2.bias'] + new['decoder']['15.residual_layer.weight'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.weight'] + new['decoder']['15.residual_layer.bias'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.bias'] + new['decoder']['16.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.1.norm1.weight'] + new['decoder']['16.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.1.norm1.bias'] + new['decoder']['16.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.1.conv1.weight'] + new['decoder']['16.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.1.conv1.bias'] + new['decoder']['16.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.1.norm2.weight'] + new['decoder']['16.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.1.norm2.bias'] + new['decoder']['16.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.1.conv2.weight'] + new['decoder']['16.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.1.conv2.bias'] + new['decoder']['17.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.2.norm1.weight'] + new['decoder']['17.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.2.norm1.bias'] + new['decoder']['17.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.2.conv1.weight'] + new['decoder']['17.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.2.conv1.bias'] + new['decoder']['17.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.2.norm2.weight'] + new['decoder']['17.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.2.norm2.bias'] + new['decoder']['17.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.2.conv2.weight'] + new['decoder']['17.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.2.conv2.bias'] + new['decoder']['19.weight'] = s['first_stage_model.decoder.up.1.upsample.conv.weight'] + new['decoder']['19.bias'] = s['first_stage_model.decoder.up.1.upsample.conv.bias'] + new['decoder']['10.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.0.norm1.weight'] + new['decoder']['10.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.0.norm1.bias'] + new['decoder']['10.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.0.conv1.weight'] + new['decoder']['10.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.0.conv1.bias'] + new['decoder']['10.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.0.norm2.weight'] + new['decoder']['10.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.0.norm2.bias'] + new['decoder']['10.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.0.conv2.weight'] + new['decoder']['10.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.0.conv2.bias'] + new['decoder']['11.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.1.norm1.weight'] + new['decoder']['11.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.1.norm1.bias'] + new['decoder']['11.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.1.conv1.weight'] + new['decoder']['11.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.1.conv1.bias'] + new['decoder']['11.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.1.norm2.weight'] + new['decoder']['11.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.1.norm2.bias'] + new['decoder']['11.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.1.conv2.weight'] + new['decoder']['11.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.1.conv2.bias'] + new['decoder']['12.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.2.norm1.weight'] + new['decoder']['12.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.2.norm1.bias'] + new['decoder']['12.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.2.conv1.weight'] + new['decoder']['12.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.2.conv1.bias'] + new['decoder']['12.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.2.norm2.weight'] + new['decoder']['12.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.2.norm2.bias'] + new['decoder']['12.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.2.conv2.weight'] + new['decoder']['12.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.2.conv2.bias'] + new['decoder']['14.weight'] = s['first_stage_model.decoder.up.2.upsample.conv.weight'] + new['decoder']['14.bias'] = s['first_stage_model.decoder.up.2.upsample.conv.bias'] + new['decoder']['5.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.0.norm1.weight'] + new['decoder']['5.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.0.norm1.bias'] + new['decoder']['5.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.0.conv1.weight'] + new['decoder']['5.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.0.conv1.bias'] + new['decoder']['5.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.0.norm2.weight'] + new['decoder']['5.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.0.norm2.bias'] + new['decoder']['5.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.0.conv2.weight'] + new['decoder']['5.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.0.conv2.bias'] + new['decoder']['6.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.1.norm1.weight'] + new['decoder']['6.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.1.norm1.bias'] + new['decoder']['6.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.1.conv1.weight'] + new['decoder']['6.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.1.conv1.bias'] + new['decoder']['6.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.1.norm2.weight'] + new['decoder']['6.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.1.norm2.bias'] + new['decoder']['6.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.1.conv2.weight'] + new['decoder']['6.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.1.conv2.bias'] + new['decoder']['7.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.2.norm1.weight'] + new['decoder']['7.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.2.norm1.bias'] + new['decoder']['7.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.2.conv1.weight'] + new['decoder']['7.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.2.conv1.bias'] + new['decoder']['7.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.2.norm2.weight'] + new['decoder']['7.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.2.norm2.bias'] + new['decoder']['7.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.2.conv2.weight'] + new['decoder']['7.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.2.conv2.bias'] + new['decoder']['9.weight'] = s['first_stage_model.decoder.up.3.upsample.conv.weight'] + new['decoder']['9.bias'] = s['first_stage_model.decoder.up.3.upsample.conv.bias'] + new['decoder']['23.weight'] = s['first_stage_model.decoder.norm_out.weight'] + new['decoder']['23.bias'] = s['first_stage_model.decoder.norm_out.bias'] + new['decoder']['25.weight'] = s['first_stage_model.decoder.conv_out.weight'] + new['decoder']['25.bias'] = s['first_stage_model.decoder.conv_out.bias'] + new['encoder']['18.weight'] = s['first_stage_model.quant_conv.weight'] + new['encoder']['18.bias'] = s['first_stage_model.quant_conv.bias'] + new['decoder']['0.weight'] = s['first_stage_model.post_quant_conv.weight'] + new['decoder']['0.bias'] = s['first_stage_model.post_quant_conv.bias'] + new['clip']['embedding.token_embedding.weight'] = s['cond_stage_model.transformer.text_model.embeddings.token_embedding.weight'] + new['clip']['embedding.position_value'] = s['cond_stage_model.transformer.text_model.embeddings.position_embedding.weight'] + new['clip']['layers.0.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight'] + new['clip']['layers.0.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias'] + new['clip']['layers.0.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight'] + new['clip']['layers.0.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias'] + new['clip']['layers.0.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight'] + new['clip']['layers.0.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias'] + new['clip']['layers.0.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight'] + new['clip']['layers.0.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias'] + new['clip']['layers.0.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight'] + new['clip']['layers.0.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias'] + new['clip']['layers.1.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight'] + new['clip']['layers.1.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias'] + new['clip']['layers.1.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight'] + new['clip']['layers.1.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias'] + new['clip']['layers.1.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight'] + new['clip']['layers.1.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias'] + new['clip']['layers.1.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight'] + new['clip']['layers.1.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias'] + new['clip']['layers.1.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight'] + new['clip']['layers.1.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias'] + new['clip']['layers.2.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight'] + new['clip']['layers.2.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias'] + new['clip']['layers.2.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight'] + new['clip']['layers.2.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias'] + new['clip']['layers.2.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight'] + new['clip']['layers.2.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias'] + new['clip']['layers.2.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight'] + new['clip']['layers.2.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias'] + new['clip']['layers.2.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight'] + new['clip']['layers.2.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias'] + new['clip']['layers.3.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight'] + new['clip']['layers.3.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias'] + new['clip']['layers.3.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight'] + new['clip']['layers.3.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias'] + new['clip']['layers.3.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight'] + new['clip']['layers.3.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias'] + new['clip']['layers.3.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight'] + new['clip']['layers.3.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias'] + new['clip']['layers.3.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight'] + new['clip']['layers.3.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias'] + new['clip']['layers.4.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight'] + new['clip']['layers.4.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias'] + new['clip']['layers.4.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight'] + new['clip']['layers.4.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias'] + new['clip']['layers.4.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight'] + new['clip']['layers.4.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias'] + new['clip']['layers.4.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight'] + new['clip']['layers.4.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias'] + new['clip']['layers.4.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight'] + new['clip']['layers.4.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias'] + new['clip']['layers.5.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight'] + new['clip']['layers.5.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias'] + new['clip']['layers.5.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight'] + new['clip']['layers.5.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias'] + new['clip']['layers.5.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight'] + new['clip']['layers.5.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias'] + new['clip']['layers.5.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight'] + new['clip']['layers.5.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias'] + new['clip']['layers.5.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight'] + new['clip']['layers.5.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias'] + new['clip']['layers.6.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight'] + new['clip']['layers.6.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias'] + new['clip']['layers.6.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight'] + new['clip']['layers.6.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias'] + new['clip']['layers.6.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight'] + new['clip']['layers.6.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias'] + new['clip']['layers.6.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight'] + new['clip']['layers.6.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias'] + new['clip']['layers.6.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight'] + new['clip']['layers.6.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias'] + new['clip']['layers.7.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight'] + new['clip']['layers.7.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias'] + new['clip']['layers.7.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight'] + new['clip']['layers.7.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias'] + new['clip']['layers.7.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight'] + new['clip']['layers.7.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias'] + new['clip']['layers.7.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight'] + new['clip']['layers.7.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias'] + new['clip']['layers.7.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight'] + new['clip']['layers.7.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias'] + new['clip']['layers.8.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight'] + new['clip']['layers.8.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias'] + new['clip']['layers.8.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight'] + new['clip']['layers.8.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias'] + new['clip']['layers.8.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight'] + new['clip']['layers.8.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias'] + new['clip']['layers.8.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight'] + new['clip']['layers.8.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias'] + new['clip']['layers.8.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight'] + new['clip']['layers.8.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias'] + new['clip']['layers.9.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight'] + new['clip']['layers.9.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias'] + new['clip']['layers.9.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight'] + new['clip']['layers.9.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias'] + new['clip']['layers.9.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight'] + new['clip']['layers.9.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias'] + new['clip']['layers.9.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight'] + new['clip']['layers.9.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias'] + new['clip']['layers.9.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight'] + new['clip']['layers.9.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias'] + new['clip']['layers.10.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight'] + new['clip']['layers.10.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias'] + new['clip']['layers.10.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight'] + new['clip']['layers.10.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias'] + new['clip']['layers.10.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight'] + new['clip']['layers.10.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias'] + new['clip']['layers.10.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight'] + new['clip']['layers.10.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias'] + new['clip']['layers.10.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight'] + new['clip']['layers.10.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias'] + new['clip']['layers.11.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight'] + new['clip']['layers.11.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias'] + new['clip']['layers.11.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight'] + new['clip']['layers.11.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias'] + new['clip']['layers.11.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight'] + new['clip']['layers.11.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias'] + new['clip']['layers.11.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight'] + new['clip']['layers.11.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias'] + new['clip']['layers.11.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight'] + new['clip']['layers.11.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias'] + new['clip']['layernorm.weight'] = s['cond_stage_model.transformer.text_model.final_layer_norm.weight'] + new['clip']['layernorm.bias'] = s['cond_stage_model.transformer.text_model.final_layer_norm.bias'] + new['diffusion']['unet.encoders.1.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.2.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.encoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.bottleneck.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.3.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.6.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.9.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.10.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['diffusion']['unet.decoders.11.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight']), 0) + new['encoder']['13.attention.in_proj.weight'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.weight'], s['first_stage_model.encoder.mid.attn_1.k.weight'], s['first_stage_model.encoder.mid.attn_1.v.weight']), 0).reshape((1536, 512)) + new['encoder']['13.attention.in_proj.bias'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.bias'], s['first_stage_model.encoder.mid.attn_1.k.bias'], s['first_stage_model.encoder.mid.attn_1.v.bias']), 0) + new['encoder']['13.attention.out_proj.weight'] = s['first_stage_model.encoder.mid.attn_1.proj_out.weight'].reshape((512, 512)) + new['decoder']['3.attention.in_proj.weight'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.weight'], s['first_stage_model.decoder.mid.attn_1.k.weight'], s['first_stage_model.decoder.mid.attn_1.v.weight']), 0).reshape((1536, 512)) + new['decoder']['3.attention.in_proj.bias'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.bias'], s['first_stage_model.decoder.mid.attn_1.k.bias'], s['first_stage_model.decoder.mid.attn_1.v.bias']), 0) + new['decoder']['3.attention.out_proj.weight'] = s['first_stage_model.decoder.mid.attn_1.proj_out.weight'].reshape((512, 512)) + new['clip']['layers.0.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight']), 0) + new['clip']['layers.0.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias']), 0) + new['clip']['layers.1.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight']), 0) + new['clip']['layers.1.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias']), 0) + new['clip']['layers.2.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight']), 0) + new['clip']['layers.2.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias']), 0) + new['clip']['layers.3.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight']), 0) + new['clip']['layers.3.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias']), 0) + new['clip']['layers.4.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight']), 0) + new['clip']['layers.4.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias']), 0) + new['clip']['layers.5.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight']), 0) + new['clip']['layers.5.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias']), 0) + new['clip']['layers.6.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight']), 0) + new['clip']['layers.6.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias']), 0) + new['clip']['layers.7.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight']), 0) + new['clip']['layers.7.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias']), 0) + new['clip']['layers.8.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight']), 0) + new['clip']['layers.8.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias']), 0) + new['clip']['layers.9.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight']), 0) + new['clip']['layers.9.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias']), 0) + new['clip']['layers.10.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight']), 0) + new['clip']['layers.10.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias']), 0) + new['clip']['layers.11.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight']), 0) + new['clip']['layers.11.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias']), 0) + return new + + From 05bbb2b10896a36c0694d33d6a48d97da9995026 Mon Sep 17 00:00:00 2001 From: a Date: Fri, 1 Mar 2024 15:59:39 +0000 Subject: [PATCH 02/11] add a demo for loading an original stable diffusion checkpoint and generating an image --- .gitignore | 3 ++- demo_loadmodel.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 demo_loadmodel.py diff --git a/.gitignore b/.gitignore index 8084e7b..7c63b63 100644 --- a/.gitignore +++ b/.gitignore @@ -201,4 +201,5 @@ cython_debug/ data/ ddata/ migrators/ -data.*.tar \ No newline at end of file +data.*.tar +out/ diff --git a/demo_loadmodel.py b/demo_loadmodel.py new file mode 100644 index 0000000..27b65a9 --- /dev/null +++ b/demo_loadmodel.py @@ -0,0 +1,49 @@ +# python3 +# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images. +import os +import safetensors.torch +import torch +from stable_diffusion_pytorch import pipeline, checkpoint_loader + +# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'. +import intel_extension_for_pytorch as ipex +device = 'xpu' + + +# where to store generated images +outDir = './out' + +# path to SD 1.4/1.5 based model safetensors file +modelPath = '/stor/download2/anything_inkBase.safetensors' + +# if true, use float16, otherwise float32 +useHalfPrecision = True + + + +os.makedirs(outDir, exist_ok=True) + +# load the checkpoint file and convert to half precision if needed +state_dict = safetensors.torch.load_file(modelPath) +if useHalfPrecision: + for x in state_dict: + state_dict[x] = state_dict[x].half() + +# convert to the state_dicts format that our library expects +state_dicts = checkpoint_loader.split_state_dict(state_dict) + +# create the model objects, and apply the weights in state_dicts +models = checkpoint_loader.load_models(state_dicts, device, useHalfPrecision) + +steps = 40 +seed = 12345 +prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours' +negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting' + +fileName = prompt.replace(' ', '_').replace('\\', '\').replace(',', '-') + '.' + str(seed) + '.png' + +images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], + models=models, n_inference_steps=steps, seed=seed, device=device, + height=768, width=512) + +images[0].save(outDir + '/' + fileName) From abe8465ea3b9795c5b1db0ba866c70e1cceed85d Mon Sep 17 00:00:00 2001 From: a Date: Fri, 1 Mar 2024 16:03:07 +0000 Subject: [PATCH 03/11] demo_loadmodel: don't put colons in filename (not allowed on windows) --- demo_loadmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo_loadmodel.py b/demo_loadmodel.py index 27b65a9..112e035 100644 --- a/demo_loadmodel.py +++ b/demo_loadmodel.py @@ -40,7 +40,7 @@ prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours' negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting' -fileName = prompt.replace(' ', '_').replace('\\', '\').replace(',', '-') + '.' + str(seed) + '.png' +fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png' images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], models=models, n_inference_steps=steps, seed=seed, device=device, From 38fb152537ec7d3fd6fc3fdd7c499797d0c3c7e8 Mon Sep 17 00:00:00 2001 From: a Date: Fri, 1 Mar 2024 16:06:05 +0000 Subject: [PATCH 04/11] demo_loadmodel: add link for the model used --- demo_loadmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo_loadmodel.py b/demo_loadmodel.py index 112e035..27588a7 100644 --- a/demo_loadmodel.py +++ b/demo_loadmodel.py @@ -14,7 +14,7 @@ outDir = './out' # path to SD 1.4/1.5 based model safetensors file -modelPath = '/stor/download2/anything_inkBase.safetensors' +modelPath = '/stor/download2/anything_inkBase.safetensors' # download from https://civitai.com/models/9409?modelVersionId=90854 # if true, use float16, otherwise float32 useHalfPrecision = True From 20f7f972fd949e06569833dfe61e883e21c65bcf Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 07:22:53 +0000 Subject: [PATCH 05/11] move checkpoint_loader to convert_from_sdmodel and model_loader; update example --- .gitignore | 1 + demo_loadmodel.py | 49 ------------- demo_sdmodel_generate_image.py | 46 ++++++++++++ ...oint_loader.py => convert_from_sdmodel.py} | 36 +--------- stable_diffusion_pytorch/model_loader.py | 72 +++++++++++++++---- 5 files changed, 107 insertions(+), 97 deletions(-) delete mode 100644 demo_loadmodel.py create mode 100644 demo_sdmodel_generate_image.py rename stable_diffusion_pytorch/{checkpoint_loader.py => convert_from_sdmodel.py} (98%) diff --git a/.gitignore b/.gitignore index 7c63b63..f4104ec 100644 --- a/.gitignore +++ b/.gitignore @@ -202,4 +202,5 @@ data/ ddata/ migrators/ data.*.tar +sd-v1-5.safetensors out/ diff --git a/demo_loadmodel.py b/demo_loadmodel.py deleted file mode 100644 index 27588a7..0000000 --- a/demo_loadmodel.py +++ /dev/null @@ -1,49 +0,0 @@ -# python3 -# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images. -import os -import safetensors.torch -import torch -from stable_diffusion_pytorch import pipeline, checkpoint_loader - -# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'. -import intel_extension_for_pytorch as ipex -device = 'xpu' - - -# where to store generated images -outDir = './out' - -# path to SD 1.4/1.5 based model safetensors file -modelPath = '/stor/download2/anything_inkBase.safetensors' # download from https://civitai.com/models/9409?modelVersionId=90854 - -# if true, use float16, otherwise float32 -useHalfPrecision = True - - - -os.makedirs(outDir, exist_ok=True) - -# load the checkpoint file and convert to half precision if needed -state_dict = safetensors.torch.load_file(modelPath) -if useHalfPrecision: - for x in state_dict: - state_dict[x] = state_dict[x].half() - -# convert to the state_dicts format that our library expects -state_dicts = checkpoint_loader.split_state_dict(state_dict) - -# create the model objects, and apply the weights in state_dicts -models = checkpoint_loader.load_models(state_dicts, device, useHalfPrecision) - -steps = 40 -seed = 12345 -prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours' -negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting' - -fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png' - -images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], - models=models, n_inference_steps=steps, seed=seed, device=device, - height=768, width=512) - -images[0].save(outDir + '/' + fileName) diff --git a/demo_sdmodel_generate_image.py b/demo_sdmodel_generate_image.py new file mode 100644 index 0000000..f408d56 --- /dev/null +++ b/demo_sdmodel_generate_image.py @@ -0,0 +1,46 @@ +# python3 +# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images. +import os +import safetensors.torch +import torch +from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader + +# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'. +import intel_extension_for_pytorch as ipex +device = 'xpu' + + +# where to store generated images +outDir = './out' + +# path to SD 1.4/1.5 based model safetensors file +modelPath = './sd-v1-5.safetensors' # download from https://huggingface.co/tabtap/sd-v1-5.safetensor/tree/main + +# either float16 or float32 +dtype = torch.float16 + + + +os.makedirs(outDir, exist_ok=True) + +# load the checkpoint file +state_dict = safetensors.torch.load_file(modelPath) + +# convert to the state_dicts format that our library expects +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +# create the model objects, and apply the weights in state_dicts +models = model_loader.load_models(state_dicts, device, dtype) + +steps = 40 +seed = 12345 +prompt = '1girl,cirno,anime style' +negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers' + +fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png' + +images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], + models=models, n_inference_steps=steps, seed=seed, device=device, + height=768, width=512) + +images[0].save(outDir + '/' + fileName) diff --git a/stable_diffusion_pytorch/checkpoint_loader.py b/stable_diffusion_pytorch/convert_from_sdmodel.py similarity index 98% rename from stable_diffusion_pytorch/checkpoint_loader.py rename to stable_diffusion_pytorch/convert_from_sdmodel.py index c21d27a..3f09b1b 100644 --- a/stable_diffusion_pytorch/checkpoint_loader.py +++ b/stable_diffusion_pytorch/convert_from_sdmodel.py @@ -2,43 +2,11 @@ import safetensors.torch from . import Tokenizer, CLIP, Encoder, Decoder, Diffusion -r""" - Create the 4 models that the pipeline expects and load the weights from state_dicts (not an original stable diffusion state_dict!). - Args: - state_dicts (`Dict[str, str]`): - A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights for that model. - You can pass in the dict returned by split_state_dict(). - device (`str`): - The device to run the models on, passed to model.to() - useHalfPrecision (`bool`, *optional*): - If true, use float16, otherwise float32. - Returns: - `Dict[str, torch.nn.Module]`: - The loaded models to be passed to pipeline.generate() - """ -def load_models(state_dicts, device, useHalfPrecision=False): - models = {} - if useHalfPrecision: - models['clip'] = CLIP().to(device).half() - models['encoder'] = Encoder().to(device).half() - models['decoder'] = Decoder().to(device).half() - models['diffusion'] = Diffusion().to(device).half() - else: - models['clip'] = CLIP().to(device) - models['encoder'] = Encoder().to(device) - models['decoder'] = Decoder().to(device) - models['diffusion'] = Diffusion().to(device) - - models['clip'].load_state_dict(state_dicts['clip']) - models['encoder'].load_state_dict(state_dicts['encoder']) - models['decoder'].load_state_dict(state_dicts['decoder']) - models['diffusion'].load_state_dict(state_dicts['diffusion']) - return models - r""" Our library model implementation is laid out differently from the original stable diffusion, so original SD state_dict can not be directly loaded. This function converts an original SD - state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that our models expect. + state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that can be passed to + model_loader.load_models() Args: state_dict (`Dict[str, str]`): The original state_dict to convert. diff --git a/stable_diffusion_pytorch/model_loader.py b/stable_diffusion_pytorch/model_loader.py index 9e5bd89..a87dc4c 100644 --- a/stable_diffusion_pytorch/model_loader.py +++ b/stable_diffusion_pytorch/model_loader.py @@ -31,42 +31,86 @@ def make_compatible(state_dict): return state_dict -def load_clip(device): +# The following functions load the default models, used when the user supplied `models` is missing +# one or more of (clip, encoder, decoder, diffusion). + +def load_clip(device, dtype): state_dict = torch.load(util.get_file_path('ckpt/clip.pt')) state_dict = make_compatible(state_dict) - clip = CLIP().to(device) + clip = CLIP().to(dtype).to(device) clip.load_state_dict(state_dict) return clip -def load_encoder(device): +def load_encoder(device, dtype): state_dict = torch.load(util.get_file_path('ckpt/encoder.pt')) state_dict = make_compatible(state_dict) - encoder = Encoder().to(device) + encoder = Encoder().to(dtype).to(device) encoder.load_state_dict(state_dict) return encoder -def load_decoder(device): +def load_decoder(device, dtype): state_dict = torch.load(util.get_file_path('ckpt/decoder.pt')) state_dict = make_compatible(state_dict) - decoder = Decoder().to(device) + decoder = Decoder().to(dtype).to(device) decoder.load_state_dict(state_dict) return decoder -def load_diffusion(device): +def load_diffusion(device, dtype): state_dict = torch.load(util.get_file_path('ckpt/diffusion.pt')) state_dict = make_compatible(state_dict) - diffusion = Diffusion().to(device) + diffusion = Diffusion().to(dtype).to(device) diffusion.load_state_dict(state_dict) return diffusion -def preload_models(device): +r""" + Create and load the 4 default models (clip, encoder, decoder, diffusion). + Args: + device (`str`): + The device to run the models on, passed to model.to() + dtype (`torch.dtype`, *optional*): + The data type of the model to create. Note that this can be different + from the data type of the checkpoint file, and pytorch will auto convert. + Returns: + `Dict[str, torch.nn.Module]`: + The loaded models to be passed to pipeline.generate() + """ +def preload_models(device, dtype=torch.float32): return { - 'clip': load_clip(device), - 'encoder': load_encoder(device), - 'decoder': load_decoder(device), - 'diffusion': load_diffusion(device), - } \ No newline at end of file + 'clip': load_clip(device, dtype), + 'encoder': load_encoder(device, dtype), + 'decoder': load_decoder(device, dtype), + 'diffusion': load_diffusion(device, dtype), + } + +r""" + Create the 4 models that the pipeline expects and load the weights from state_dicts + (not an original stable diffusion state_dict!). + Args: + state_dicts (`Dict[str, str]`): + A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights + for that model. You can pass in the dict returned by convert_from_sdmodel.split_state_dict(). + device (`str`): + The device to run the models on, passed to model.to() + dtype (`torch.dtype`, *optional*): + The data type of the model to create. Note that this can be different + from the data type of the checkpoint file, and pytorch will auto convert. + Returns: + `Dict[str, torch.nn.Module]`: + The loaded models to be passed to pipeline.generate() + """ +def load_models(state_dicts, device, dtype=torch.float32): + models = {} + models['clip'] = CLIP().to(dtype).to(device) + models['encoder'] = Encoder().to(dtype).to(device) + models['decoder'] = Decoder().to(dtype).to(device) + models['diffusion'] = Diffusion().to(dtype).to(device) + + models['clip'].load_state_dict(state_dicts['clip']) + models['encoder'].load_state_dict(state_dicts['encoder']) + models['decoder'].load_state_dict(state_dicts['decoder']) + models['diffusion'].load_state_dict(state_dicts['diffusion']) + return models From 536e04ab0414ff50b45e32fe8164780c944184eb Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:02:36 +0000 Subject: [PATCH 06/11] demo_sdmodel_generate_image.py: convert tabs to spaces --- demo_sdmodel_generate_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo_sdmodel_generate_image.py b/demo_sdmodel_generate_image.py index f408d56..efaf0a2 100644 --- a/demo_sdmodel_generate_image.py +++ b/demo_sdmodel_generate_image.py @@ -40,7 +40,7 @@ fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png' images = pipeline.generate([prompt], uncond_prompts=[negativePrompt], - models=models, n_inference_steps=steps, seed=seed, device=device, - height=768, width=512) + models=models, n_inference_steps=steps, seed=seed, device=device, + height=768, width=512) images[0].save(outDir + '/' + fileName) From 29222fcd7119ad844746b4c1f47000c7e3b55beb Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:02:46 +0000 Subject: [PATCH 07/11] add convert_model.py --- convert_model.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 convert_model.py diff --git a/convert_model.py b/convert_model.py new file mode 100644 index 0000000..671239c --- /dev/null +++ b/convert_model.py @@ -0,0 +1,28 @@ +import argparse +import safetensors.torch +from stable_diffusion_pytorch import convert_from_sdmodel + +parser = argparse.ArgumentParser() +parser.add_argument( + "--sd_model", type=str, required=True, + help="Stable Diffusion model to load in safetensors format" +) +parser.add_argument( + "--save_to", type=str, required=True, + help="The prefix of the path to save to, for example \"./mymodel_\" " \ + "will save 4 files, mymodel_clip.safetensors, mymodel_decoder.safetensors, " \ + "mymodel_encoder.safetensors, and mymodel_diffusion.safetensors" +) + +args = parser.parse_args() + +# load the checkpoint file +state_dict = safetensors.torch.load_file(args.sd_model) + +# convert to the state_dicts format that our library expects +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +for key in state_dicts: + outPath = args.save_to + key + '.safetensors' + print('Writing', outPath) + safetensors.torch.save_file(state_dicts[key], outPath) From bfb931ab72c60a28fe962882a110f4d704fc81f5 Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:06:45 +0000 Subject: [PATCH 08/11] default dtype float32 --- stable_diffusion_pytorch/model_loader.py | 8 ++++---- stable_diffusion_pytorch/pipeline.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/stable_diffusion_pytorch/model_loader.py b/stable_diffusion_pytorch/model_loader.py index a87dc4c..1756ad6 100644 --- a/stable_diffusion_pytorch/model_loader.py +++ b/stable_diffusion_pytorch/model_loader.py @@ -34,7 +34,7 @@ def make_compatible(state_dict): # The following functions load the default models, used when the user supplied `models` is missing # one or more of (clip, encoder, decoder, diffusion). -def load_clip(device, dtype): +def load_clip(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/clip.pt')) state_dict = make_compatible(state_dict) @@ -42,7 +42,7 @@ def load_clip(device, dtype): clip.load_state_dict(state_dict) return clip -def load_encoder(device, dtype): +def load_encoder(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/encoder.pt')) state_dict = make_compatible(state_dict) @@ -50,7 +50,7 @@ def load_encoder(device, dtype): encoder.load_state_dict(state_dict) return encoder -def load_decoder(device, dtype): +def load_decoder(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/decoder.pt')) state_dict = make_compatible(state_dict) @@ -58,7 +58,7 @@ def load_decoder(device, dtype): decoder.load_state_dict(state_dict) return decoder -def load_diffusion(device, dtype): +def load_diffusion(device, dtype=torch.float32): state_dict = torch.load(util.get_file_path('ckpt/diffusion.pt')) state_dict = make_compatible(state_dict) diff --git a/stable_diffusion_pytorch/pipeline.py b/stable_diffusion_pytorch/pipeline.py index b5d1868..3d16208 100644 --- a/stable_diffusion_pytorch/pipeline.py +++ b/stable_diffusion_pytorch/pipeline.py @@ -58,6 +58,8 @@ def generate( expense of slower inference. This parameter will be modulated by `strength`. models (`Dict[str, torch.nn.Module]`, *optional*): Preloaded models. If some or all models are not provided, they will be loaded dynamically. + Note that auto-loaded modules default to float32, so if you are using float16 models, you must + provide all 4 models. seed (`int`, *optional*): A seed to make generation deterministic. device (`str` or `torch.device`, *optional*): From 95b1e25b7879a1fa09103db7b9ebf373cbbd4de4 Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:16:49 +0000 Subject: [PATCH 09/11] convert_model.py: add help text --- convert_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_model.py b/convert_model.py index 671239c..c72460f 100644 --- a/convert_model.py +++ b/convert_model.py @@ -2,7 +2,7 @@ import safetensors.torch from stable_diffusion_pytorch import convert_from_sdmodel -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(description='Convert an original stable diffusion checkpoint file in safetensor format to 4 separate safetensors for this library.') parser.add_argument( "--sd_model", type=str, required=True, help="Stable Diffusion model to load in safetensors format" From a02642de422544440719a96c858f84fc4b21e03e Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:25:13 +0000 Subject: [PATCH 10/11] README.md: add example of how to load original sd checkpoint --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 5abcb59..55e567d 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,21 @@ prompts = ["a photograph of an astronaut riding a horse"] images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu') ``` +Load an original stable diffusion model in safetensors format: +```py +import safetensors.torch +from stable_diffusion_pytorch import convert_from_sdmodel, model_loader + +# load the checkpoint file +state_dict = safetensors.torch.load_file('/PATH/TO/sd-v1-5.safetensors') + +# split the state_dict into the 4 state_dicts for our models +state_dicts = convert_from_sdmodel.split_state_dict(state_dict) + +# create the model objects, and apply the weights in state_dicts +models = model_loader.load_models(state_dicts, 'cpu') +``` + Image-to-image generation: ```py from PIL import Image From 411fd8e02444448a114425a60d1c844698cbae6d Mon Sep 17 00:00:00 2001 From: a Date: Mon, 4 Mar 2024 08:31:08 +0000 Subject: [PATCH 11/11] README.md: make sd model example more complete --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 55e567d..add3be1 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ images = pipeline.generate(prompts, models=models, device='cuda', idle_device='c Load an original stable diffusion model in safetensors format: ```py import safetensors.torch -from stable_diffusion_pytorch import convert_from_sdmodel, model_loader +from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader # load the checkpoint file state_dict = safetensors.torch.load_file('/PATH/TO/sd-v1-5.safetensors') @@ -108,6 +108,9 @@ state_dicts = convert_from_sdmodel.split_state_dict(state_dict) # create the model objects, and apply the weights in state_dicts models = model_loader.load_models(state_dicts, 'cpu') + +prompts = ["a photograph of an astronaut riding a horse"] +images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu') ``` Image-to-image generation: