From 185f69e1fed9500448ffbe7150bf79d4e53e4d6b Mon Sep 17 00:00:00 2001
From: a
Date: Fri, 1 Mar 2024 15:41:32 +0000
Subject: [PATCH 01/11] add checkpoint_loader, support original stable
diffusion checkpoints
---
stable_diffusion_pytorch/checkpoint_loader.py | 1101 +++++++++++++++++
1 file changed, 1101 insertions(+)
create mode 100644 stable_diffusion_pytorch/checkpoint_loader.py
diff --git a/stable_diffusion_pytorch/checkpoint_loader.py b/stable_diffusion_pytorch/checkpoint_loader.py
new file mode 100644
index 0000000..c21d27a
--- /dev/null
+++ b/stable_diffusion_pytorch/checkpoint_loader.py
@@ -0,0 +1,1101 @@
+import torch
+import safetensors.torch
+from . import Tokenizer, CLIP, Encoder, Decoder, Diffusion
+
+r"""
+ Create the 4 models that the pipeline expects and load the weights from state_dicts (not an original stable diffusion state_dict!).
+ Args:
+ state_dicts (`Dict[str, str]`):
+ A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights for that model.
+ You can pass in the dict returned by split_state_dict().
+ device (`str`):
+ The device to run the models on, passed to model.to()
+ useHalfPrecision (`bool`, *optional*):
+ If true, use float16, otherwise float32.
+ Returns:
+ `Dict[str, torch.nn.Module]`:
+ The loaded models to be passed to pipeline.generate()
+ """
+def load_models(state_dicts, device, useHalfPrecision=False):
+ models = {}
+ if useHalfPrecision:
+ models['clip'] = CLIP().to(device).half()
+ models['encoder'] = Encoder().to(device).half()
+ models['decoder'] = Decoder().to(device).half()
+ models['diffusion'] = Diffusion().to(device).half()
+ else:
+ models['clip'] = CLIP().to(device)
+ models['encoder'] = Encoder().to(device)
+ models['decoder'] = Decoder().to(device)
+ models['diffusion'] = Diffusion().to(device)
+
+ models['clip'].load_state_dict(state_dicts['clip'])
+ models['encoder'].load_state_dict(state_dicts['encoder'])
+ models['decoder'].load_state_dict(state_dicts['decoder'])
+ models['diffusion'].load_state_dict(state_dicts['diffusion'])
+ return models
+
+r"""
+ Our library model implementation is laid out differently from the original stable diffusion, so
+ original SD state_dict can not be directly loaded. This function converts an original SD
+ state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that our models expect.
+ Args:
+ state_dict (`Dict[str, str]`):
+ The original state_dict to convert.
+ Returns:
+ `Dict[str, Dict[str, str]]`:
+ The converted state dicts.
+ """
+def split_state_dict(state_dict):
+ new = {}
+ new['diffusion'] = {}
+ new['encoder'] = {}
+ new['decoder'] = {}
+ new['clip'] = {}
+
+ s = state_dict
+ new['diffusion']['time_embedding.linear_1.weight'] = s['model.diffusion_model.time_embed.0.weight']
+ new['diffusion']['time_embedding.linear_1.bias'] = s['model.diffusion_model.time_embed.0.bias']
+ new['diffusion']['time_embedding.linear_2.weight'] = s['model.diffusion_model.time_embed.2.weight']
+ new['diffusion']['time_embedding.linear_2.bias'] = s['model.diffusion_model.time_embed.2.bias']
+ new['diffusion']['unet.encoders.0.0.weight'] = s['model.diffusion_model.input_blocks.0.0.weight']
+ new['diffusion']['unet.encoders.0.0.bias'] = s['model.diffusion_model.input_blocks.0.0.bias']
+ new['diffusion']['unet.encoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.1.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.1.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.1.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.1.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.1.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.1.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.1.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.1.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.1.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.1.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.1.1.norm.weight']
+ new['diffusion']['unet.encoders.1.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.1.1.norm.bias']
+ new['diffusion']['unet.encoders.1.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_in.weight']
+ new['diffusion']['unet.encoders.1.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_in.bias']
+ new['diffusion']['unet.encoders.1.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.1.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.1.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.1.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.1.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.1.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.1.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.1.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.1.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.1.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.1.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.1.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.1.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.1.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.1.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.1.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.1.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.1.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.1.1.proj_out.weight']
+ new['diffusion']['unet.encoders.1.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.1.1.proj_out.bias']
+ new['diffusion']['unet.encoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.2.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.2.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.2.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.2.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.2.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.2.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.2.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.2.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.2.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.2.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.2.1.norm.weight']
+ new['diffusion']['unet.encoders.2.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.2.1.norm.bias']
+ new['diffusion']['unet.encoders.2.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_in.weight']
+ new['diffusion']['unet.encoders.2.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_in.bias']
+ new['diffusion']['unet.encoders.2.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.2.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.2.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.2.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.2.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.2.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.2.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.2.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.2.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.2.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.2.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.2.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.2.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.2.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.2.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.2.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.2.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.2.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.2.1.proj_out.weight']
+ new['diffusion']['unet.encoders.2.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.2.1.proj_out.bias']
+ new['diffusion']['unet.encoders.3.0.weight'] = s['model.diffusion_model.input_blocks.3.0.op.weight']
+ new['diffusion']['unet.encoders.3.0.bias'] = s['model.diffusion_model.input_blocks.3.0.op.bias']
+ new['diffusion']['unet.encoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.4.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.4.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.4.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.4.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.4.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.4.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.4.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.4.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.4.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.4.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.weight']
+ new['diffusion']['unet.encoders.4.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.4.0.skip_connection.bias']
+ new['diffusion']['unet.encoders.4.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.4.1.norm.weight']
+ new['diffusion']['unet.encoders.4.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.4.1.norm.bias']
+ new['diffusion']['unet.encoders.4.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_in.weight']
+ new['diffusion']['unet.encoders.4.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_in.bias']
+ new['diffusion']['unet.encoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.4.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.4.1.proj_out.weight']
+ new['diffusion']['unet.encoders.4.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.4.1.proj_out.bias']
+ new['diffusion']['unet.encoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.5.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.5.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.5.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.5.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.5.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.5.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.5.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.5.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.5.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.5.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.5.1.norm.weight']
+ new['diffusion']['unet.encoders.5.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.5.1.norm.bias']
+ new['diffusion']['unet.encoders.5.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_in.weight']
+ new['diffusion']['unet.encoders.5.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_in.bias']
+ new['diffusion']['unet.encoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.5.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.5.1.proj_out.weight']
+ new['diffusion']['unet.encoders.5.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.5.1.proj_out.bias']
+ new['diffusion']['unet.encoders.6.0.weight'] = s['model.diffusion_model.input_blocks.6.0.op.weight']
+ new['diffusion']['unet.encoders.6.0.bias'] = s['model.diffusion_model.input_blocks.6.0.op.bias']
+ new['diffusion']['unet.encoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.7.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.7.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.7.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.7.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.7.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.7.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.7.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.7.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.7.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.7.0.residual_layer.weight'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.weight']
+ new['diffusion']['unet.encoders.7.0.residual_layer.bias'] = s['model.diffusion_model.input_blocks.7.0.skip_connection.bias']
+ new['diffusion']['unet.encoders.7.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.7.1.norm.weight']
+ new['diffusion']['unet.encoders.7.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.7.1.norm.bias']
+ new['diffusion']['unet.encoders.7.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_in.weight']
+ new['diffusion']['unet.encoders.7.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_in.bias']
+ new['diffusion']['unet.encoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.7.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.7.1.proj_out.weight']
+ new['diffusion']['unet.encoders.7.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.7.1.proj_out.bias']
+ new['diffusion']['unet.encoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.8.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.8.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.8.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.8.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.8.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.8.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.8.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.8.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.8.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.8.1.groupnorm.weight'] = s['model.diffusion_model.input_blocks.8.1.norm.weight']
+ new['diffusion']['unet.encoders.8.1.groupnorm.bias'] = s['model.diffusion_model.input_blocks.8.1.norm.bias']
+ new['diffusion']['unet.encoders.8.1.conv_input.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_in.weight']
+ new['diffusion']['unet.encoders.8.1.conv_input.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_in.bias']
+ new['diffusion']['unet.encoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.encoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.encoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.encoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.encoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.encoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.encoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.encoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.encoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.encoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.encoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.encoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.encoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.encoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.encoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.encoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.encoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.encoders.8.1.conv_output.weight'] = s['model.diffusion_model.input_blocks.8.1.proj_out.weight']
+ new['diffusion']['unet.encoders.8.1.conv_output.bias'] = s['model.diffusion_model.input_blocks.8.1.proj_out.bias']
+ new['diffusion']['unet.encoders.9.0.weight'] = s['model.diffusion_model.input_blocks.9.0.op.weight']
+ new['diffusion']['unet.encoders.9.0.bias'] = s['model.diffusion_model.input_blocks.9.0.op.bias']
+ new['diffusion']['unet.encoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.10.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.10.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.10.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.10.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.10.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.10.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.10.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.10.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.10.0.out_layers.3.bias']
+ new['diffusion']['unet.encoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.weight']
+ new['diffusion']['unet.encoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.0.bias']
+ new['diffusion']['unet.encoders.11.0.conv_feature.weight'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.weight']
+ new['diffusion']['unet.encoders.11.0.conv_feature.bias'] = s['model.diffusion_model.input_blocks.11.0.in_layers.2.bias']
+ new['diffusion']['unet.encoders.11.0.linear_time.weight'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.weight']
+ new['diffusion']['unet.encoders.11.0.linear_time.bias'] = s['model.diffusion_model.input_blocks.11.0.emb_layers.1.bias']
+ new['diffusion']['unet.encoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.weight']
+ new['diffusion']['unet.encoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.0.bias']
+ new['diffusion']['unet.encoders.11.0.conv_merged.weight'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.weight']
+ new['diffusion']['unet.encoders.11.0.conv_merged.bias'] = s['model.diffusion_model.input_blocks.11.0.out_layers.3.bias']
+ new['diffusion']['unet.bottleneck.0.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.0.weight']
+ new['diffusion']['unet.bottleneck.0.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.0.bias']
+ new['diffusion']['unet.bottleneck.0.conv_feature.weight'] = s['model.diffusion_model.middle_block.0.in_layers.2.weight']
+ new['diffusion']['unet.bottleneck.0.conv_feature.bias'] = s['model.diffusion_model.middle_block.0.in_layers.2.bias']
+ new['diffusion']['unet.bottleneck.0.linear_time.weight'] = s['model.diffusion_model.middle_block.0.emb_layers.1.weight']
+ new['diffusion']['unet.bottleneck.0.linear_time.bias'] = s['model.diffusion_model.middle_block.0.emb_layers.1.bias']
+ new['diffusion']['unet.bottleneck.0.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.0.weight']
+ new['diffusion']['unet.bottleneck.0.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.0.bias']
+ new['diffusion']['unet.bottleneck.0.conv_merged.weight'] = s['model.diffusion_model.middle_block.0.out_layers.3.weight']
+ new['diffusion']['unet.bottleneck.0.conv_merged.bias'] = s['model.diffusion_model.middle_block.0.out_layers.3.bias']
+ new['diffusion']['unet.bottleneck.1.groupnorm.weight'] = s['model.diffusion_model.middle_block.1.norm.weight']
+ new['diffusion']['unet.bottleneck.1.groupnorm.bias'] = s['model.diffusion_model.middle_block.1.norm.bias']
+ new['diffusion']['unet.bottleneck.1.conv_input.weight'] = s['model.diffusion_model.middle_block.1.proj_in.weight']
+ new['diffusion']['unet.bottleneck.1.conv_input.bias'] = s['model.diffusion_model.middle_block.1.proj_in.bias']
+ new['diffusion']['unet.bottleneck.1.attention_1.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.bottleneck.1.attention_1.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.bottleneck.1.linear_geglu_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.bottleneck.1.linear_geglu_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.bottleneck.1.linear_geglu_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.bottleneck.1.linear_geglu_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.bottleneck.1.attention_2.q_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.bottleneck.1.attention_2.k_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.bottleneck.1.attention_2.v_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.bottleneck.1.attention_2.out_proj.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.bottleneck.1.attention_2.out_proj.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.bottleneck.1.layernorm_1.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.bottleneck.1.layernorm_1.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.bottleneck.1.layernorm_2.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.bottleneck.1.layernorm_2.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.bottleneck.1.layernorm_3.weight'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.bottleneck.1.layernorm_3.bias'] = s['model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.bottleneck.1.conv_output.weight'] = s['model.diffusion_model.middle_block.1.proj_out.weight']
+ new['diffusion']['unet.bottleneck.1.conv_output.bias'] = s['model.diffusion_model.middle_block.1.proj_out.bias']
+ new['diffusion']['unet.bottleneck.2.groupnorm_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.0.weight']
+ new['diffusion']['unet.bottleneck.2.groupnorm_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.0.bias']
+ new['diffusion']['unet.bottleneck.2.conv_feature.weight'] = s['model.diffusion_model.middle_block.2.in_layers.2.weight']
+ new['diffusion']['unet.bottleneck.2.conv_feature.bias'] = s['model.diffusion_model.middle_block.2.in_layers.2.bias']
+ new['diffusion']['unet.bottleneck.2.linear_time.weight'] = s['model.diffusion_model.middle_block.2.emb_layers.1.weight']
+ new['diffusion']['unet.bottleneck.2.linear_time.bias'] = s['model.diffusion_model.middle_block.2.emb_layers.1.bias']
+ new['diffusion']['unet.bottleneck.2.groupnorm_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.0.weight']
+ new['diffusion']['unet.bottleneck.2.groupnorm_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.0.bias']
+ new['diffusion']['unet.bottleneck.2.conv_merged.weight'] = s['model.diffusion_model.middle_block.2.out_layers.3.weight']
+ new['diffusion']['unet.bottleneck.2.conv_merged.bias'] = s['model.diffusion_model.middle_block.2.out_layers.3.bias']
+ new['diffusion']['unet.decoders.0.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.0.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.0.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.0.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.0.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.0.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.0.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.0.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.0.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.0.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.0.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.0.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.0.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.0.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.0.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.0.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.1.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.1.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.1.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.1.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.1.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.1.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.1.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.1.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.1.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.1.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.1.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.1.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.1.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.1.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.1.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.1.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.2.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.2.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.2.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.2.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.2.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.2.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.2.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.2.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.2.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.2.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.2.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.2.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.2.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.2.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.2.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.2.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.2.1.conv.weight'] = s['model.diffusion_model.output_blocks.2.1.conv.weight']
+ new['diffusion']['unet.decoders.2.1.conv.bias'] = s['model.diffusion_model.output_blocks.2.1.conv.bias']
+ new['diffusion']['unet.decoders.3.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.3.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.3.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.3.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.3.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.3.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.3.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.3.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.3.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.3.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.3.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.3.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.3.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.3.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.3.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.3.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.3.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.3.1.norm.weight']
+ new['diffusion']['unet.decoders.3.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.3.1.norm.bias']
+ new['diffusion']['unet.decoders.3.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_in.weight']
+ new['diffusion']['unet.decoders.3.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_in.bias']
+ new['diffusion']['unet.decoders.3.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.3.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.3.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.3.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.3.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.3.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.3.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.3.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.3.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.3.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.3.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.3.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.3.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.3.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.3.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.3.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.3.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.3.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.3.1.proj_out.weight']
+ new['diffusion']['unet.decoders.3.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.3.1.proj_out.bias']
+ new['diffusion']['unet.decoders.4.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.4.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.4.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.4.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.4.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.4.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.4.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.4.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.4.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.4.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.4.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.4.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.4.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.4.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.4.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.4.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.4.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.4.1.norm.weight']
+ new['diffusion']['unet.decoders.4.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.4.1.norm.bias']
+ new['diffusion']['unet.decoders.4.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_in.weight']
+ new['diffusion']['unet.decoders.4.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_in.bias']
+ new['diffusion']['unet.decoders.4.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.4.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.4.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.4.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.4.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.4.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.4.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.4.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.4.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.4.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.4.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.4.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.4.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.4.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.4.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.4.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.4.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.4.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.4.1.proj_out.weight']
+ new['diffusion']['unet.decoders.4.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.4.1.proj_out.bias']
+ new['diffusion']['unet.decoders.5.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.5.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.5.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.5.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.5.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.5.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.5.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.5.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.5.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.5.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.5.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.5.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.5.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.5.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.5.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.5.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.5.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.5.1.norm.weight']
+ new['diffusion']['unet.decoders.5.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.5.1.norm.bias']
+ new['diffusion']['unet.decoders.5.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_in.weight']
+ new['diffusion']['unet.decoders.5.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_in.bias']
+ new['diffusion']['unet.decoders.5.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.5.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.5.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.5.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.5.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.5.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.5.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.5.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.5.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.5.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.5.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.5.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.5.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.5.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.5.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.5.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.5.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.5.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.5.1.proj_out.weight']
+ new['diffusion']['unet.decoders.5.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.5.1.proj_out.bias']
+ new['diffusion']['unet.decoders.5.2.conv.weight'] = s['model.diffusion_model.output_blocks.5.2.conv.weight']
+ new['diffusion']['unet.decoders.5.2.conv.bias'] = s['model.diffusion_model.output_blocks.5.2.conv.bias']
+ new['diffusion']['unet.decoders.6.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.6.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.6.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.6.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.6.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.6.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.6.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.6.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.6.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.6.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.6.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.6.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.6.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.6.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.6.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.6.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.6.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.6.1.norm.weight']
+ new['diffusion']['unet.decoders.6.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.6.1.norm.bias']
+ new['diffusion']['unet.decoders.6.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_in.weight']
+ new['diffusion']['unet.decoders.6.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_in.bias']
+ new['diffusion']['unet.decoders.6.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.6.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.6.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.6.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.6.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.6.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.6.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.6.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.6.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.6.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.6.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.6.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.6.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.6.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.6.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.6.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.6.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.6.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.6.1.proj_out.weight']
+ new['diffusion']['unet.decoders.6.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.6.1.proj_out.bias']
+ new['diffusion']['unet.decoders.7.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.7.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.7.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.7.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.7.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.7.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.7.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.7.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.7.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.7.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.7.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.7.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.7.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.7.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.7.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.7.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.7.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.7.1.norm.weight']
+ new['diffusion']['unet.decoders.7.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.7.1.norm.bias']
+ new['diffusion']['unet.decoders.7.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_in.weight']
+ new['diffusion']['unet.decoders.7.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_in.bias']
+ new['diffusion']['unet.decoders.7.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.7.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.7.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.7.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.7.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.7.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.7.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.7.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.7.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.7.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.7.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.7.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.7.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.7.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.7.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.7.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.7.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.7.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.7.1.proj_out.weight']
+ new['diffusion']['unet.decoders.7.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.7.1.proj_out.bias']
+ new['diffusion']['unet.decoders.8.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.8.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.8.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.8.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.8.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.8.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.8.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.8.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.8.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.8.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.8.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.8.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.8.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.8.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.8.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.8.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.8.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.8.1.norm.weight']
+ new['diffusion']['unet.decoders.8.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.8.1.norm.bias']
+ new['diffusion']['unet.decoders.8.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_in.weight']
+ new['diffusion']['unet.decoders.8.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_in.bias']
+ new['diffusion']['unet.decoders.8.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.8.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.8.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.8.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.8.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.8.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.8.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.8.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.8.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.8.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.8.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.8.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.8.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.8.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.8.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.8.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.8.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.8.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.8.1.proj_out.weight']
+ new['diffusion']['unet.decoders.8.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.8.1.proj_out.bias']
+ new['diffusion']['unet.decoders.8.2.conv.weight'] = s['model.diffusion_model.output_blocks.8.2.conv.weight']
+ new['diffusion']['unet.decoders.8.2.conv.bias'] = s['model.diffusion_model.output_blocks.8.2.conv.bias']
+ new['diffusion']['unet.decoders.9.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.9.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.9.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.9.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.9.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.9.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.9.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.9.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.9.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.9.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.9.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.9.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.9.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.9.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.9.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.9.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.9.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.9.1.norm.weight']
+ new['diffusion']['unet.decoders.9.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.9.1.norm.bias']
+ new['diffusion']['unet.decoders.9.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_in.weight']
+ new['diffusion']['unet.decoders.9.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_in.bias']
+ new['diffusion']['unet.decoders.9.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.9.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.9.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.9.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.9.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.9.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.9.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.9.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.9.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.9.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.9.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.9.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.9.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.9.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.9.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.9.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.9.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.9.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.9.1.proj_out.weight']
+ new['diffusion']['unet.decoders.9.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.9.1.proj_out.bias']
+ new['diffusion']['unet.decoders.10.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.10.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.10.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.10.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.10.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.10.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.10.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.10.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.10.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.10.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.10.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.10.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.10.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.10.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.10.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.10.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.10.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.10.1.norm.weight']
+ new['diffusion']['unet.decoders.10.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.10.1.norm.bias']
+ new['diffusion']['unet.decoders.10.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_in.weight']
+ new['diffusion']['unet.decoders.10.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_in.bias']
+ new['diffusion']['unet.decoders.10.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.10.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.10.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.10.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.10.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.10.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.10.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.10.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.10.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.10.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.10.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.10.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.10.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.10.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.10.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.10.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.10.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.10.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.10.1.proj_out.weight']
+ new['diffusion']['unet.decoders.10.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.10.1.proj_out.bias']
+ new['diffusion']['unet.decoders.11.0.groupnorm_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.weight']
+ new['diffusion']['unet.decoders.11.0.groupnorm_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.0.bias']
+ new['diffusion']['unet.decoders.11.0.conv_feature.weight'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.weight']
+ new['diffusion']['unet.decoders.11.0.conv_feature.bias'] = s['model.diffusion_model.output_blocks.11.0.in_layers.2.bias']
+ new['diffusion']['unet.decoders.11.0.linear_time.weight'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.weight']
+ new['diffusion']['unet.decoders.11.0.linear_time.bias'] = s['model.diffusion_model.output_blocks.11.0.emb_layers.1.bias']
+ new['diffusion']['unet.decoders.11.0.groupnorm_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.weight']
+ new['diffusion']['unet.decoders.11.0.groupnorm_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.0.bias']
+ new['diffusion']['unet.decoders.11.0.conv_merged.weight'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.weight']
+ new['diffusion']['unet.decoders.11.0.conv_merged.bias'] = s['model.diffusion_model.output_blocks.11.0.out_layers.3.bias']
+ new['diffusion']['unet.decoders.11.0.residual_layer.weight'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.weight']
+ new['diffusion']['unet.decoders.11.0.residual_layer.bias'] = s['model.diffusion_model.output_blocks.11.0.skip_connection.bias']
+ new['diffusion']['unet.decoders.11.1.groupnorm.weight'] = s['model.diffusion_model.output_blocks.11.1.norm.weight']
+ new['diffusion']['unet.decoders.11.1.groupnorm.bias'] = s['model.diffusion_model.output_blocks.11.1.norm.bias']
+ new['diffusion']['unet.decoders.11.1.conv_input.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_in.weight']
+ new['diffusion']['unet.decoders.11.1.conv_input.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_in.bias']
+ new['diffusion']['unet.decoders.11.1.attention_1.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight']
+ new['diffusion']['unet.decoders.11.1.attention_1.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias']
+ new['diffusion']['unet.decoders.11.1.linear_geglu_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight']
+ new['diffusion']['unet.decoders.11.1.linear_geglu_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias']
+ new['diffusion']['unet.decoders.11.1.linear_geglu_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight']
+ new['diffusion']['unet.decoders.11.1.linear_geglu_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias']
+ new['diffusion']['unet.decoders.11.1.attention_2.q_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight']
+ new['diffusion']['unet.decoders.11.1.attention_2.k_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight']
+ new['diffusion']['unet.decoders.11.1.attention_2.v_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight']
+ new['diffusion']['unet.decoders.11.1.attention_2.out_proj.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight']
+ new['diffusion']['unet.decoders.11.1.attention_2.out_proj.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias']
+ new['diffusion']['unet.decoders.11.1.layernorm_1.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight']
+ new['diffusion']['unet.decoders.11.1.layernorm_1.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias']
+ new['diffusion']['unet.decoders.11.1.layernorm_2.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight']
+ new['diffusion']['unet.decoders.11.1.layernorm_2.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias']
+ new['diffusion']['unet.decoders.11.1.layernorm_3.weight'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight']
+ new['diffusion']['unet.decoders.11.1.layernorm_3.bias'] = s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias']
+ new['diffusion']['unet.decoders.11.1.conv_output.weight'] = s['model.diffusion_model.output_blocks.11.1.proj_out.weight']
+ new['diffusion']['unet.decoders.11.1.conv_output.bias'] = s['model.diffusion_model.output_blocks.11.1.proj_out.bias']
+ new['diffusion']['final.groupnorm.weight'] = s['model.diffusion_model.out.0.weight']
+ new['diffusion']['final.groupnorm.bias'] = s['model.diffusion_model.out.0.bias']
+ new['diffusion']['final.conv.weight'] = s['model.diffusion_model.out.2.weight']
+ new['diffusion']['final.conv.bias'] = s['model.diffusion_model.out.2.bias']
+ new['encoder']['0.weight'] = s['first_stage_model.encoder.conv_in.weight']
+ new['encoder']['0.bias'] = s['first_stage_model.encoder.conv_in.bias']
+ new['encoder']['1.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.0.norm1.weight']
+ new['encoder']['1.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.0.norm1.bias']
+ new['encoder']['1.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.0.conv1.weight']
+ new['encoder']['1.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.0.conv1.bias']
+ new['encoder']['1.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.0.norm2.weight']
+ new['encoder']['1.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.0.norm2.bias']
+ new['encoder']['1.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.0.conv2.weight']
+ new['encoder']['1.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.0.conv2.bias']
+ new['encoder']['2.groupnorm_1.weight'] = s['first_stage_model.encoder.down.0.block.1.norm1.weight']
+ new['encoder']['2.groupnorm_1.bias'] = s['first_stage_model.encoder.down.0.block.1.norm1.bias']
+ new['encoder']['2.conv_1.weight'] = s['first_stage_model.encoder.down.0.block.1.conv1.weight']
+ new['encoder']['2.conv_1.bias'] = s['first_stage_model.encoder.down.0.block.1.conv1.bias']
+ new['encoder']['2.groupnorm_2.weight'] = s['first_stage_model.encoder.down.0.block.1.norm2.weight']
+ new['encoder']['2.groupnorm_2.bias'] = s['first_stage_model.encoder.down.0.block.1.norm2.bias']
+ new['encoder']['2.conv_2.weight'] = s['first_stage_model.encoder.down.0.block.1.conv2.weight']
+ new['encoder']['2.conv_2.bias'] = s['first_stage_model.encoder.down.0.block.1.conv2.bias']
+ new['encoder']['3.weight'] = s['first_stage_model.encoder.down.0.downsample.conv.weight']
+ new['encoder']['3.bias'] = s['first_stage_model.encoder.down.0.downsample.conv.bias']
+ new['encoder']['4.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.0.norm1.weight']
+ new['encoder']['4.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.0.norm1.bias']
+ new['encoder']['4.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.0.conv1.weight']
+ new['encoder']['4.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.0.conv1.bias']
+ new['encoder']['4.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.0.norm2.weight']
+ new['encoder']['4.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.0.norm2.bias']
+ new['encoder']['4.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.0.conv2.weight']
+ new['encoder']['4.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.0.conv2.bias']
+ new['encoder']['4.residual_layer.weight'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.weight']
+ new['encoder']['4.residual_layer.bias'] = s['first_stage_model.encoder.down.1.block.0.nin_shortcut.bias']
+ new['encoder']['5.groupnorm_1.weight'] = s['first_stage_model.encoder.down.1.block.1.norm1.weight']
+ new['encoder']['5.groupnorm_1.bias'] = s['first_stage_model.encoder.down.1.block.1.norm1.bias']
+ new['encoder']['5.conv_1.weight'] = s['first_stage_model.encoder.down.1.block.1.conv1.weight']
+ new['encoder']['5.conv_1.bias'] = s['first_stage_model.encoder.down.1.block.1.conv1.bias']
+ new['encoder']['5.groupnorm_2.weight'] = s['first_stage_model.encoder.down.1.block.1.norm2.weight']
+ new['encoder']['5.groupnorm_2.bias'] = s['first_stage_model.encoder.down.1.block.1.norm2.bias']
+ new['encoder']['5.conv_2.weight'] = s['first_stage_model.encoder.down.1.block.1.conv2.weight']
+ new['encoder']['5.conv_2.bias'] = s['first_stage_model.encoder.down.1.block.1.conv2.bias']
+ new['encoder']['6.weight'] = s['first_stage_model.encoder.down.1.downsample.conv.weight']
+ new['encoder']['6.bias'] = s['first_stage_model.encoder.down.1.downsample.conv.bias']
+ new['encoder']['7.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.0.norm1.weight']
+ new['encoder']['7.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.0.norm1.bias']
+ new['encoder']['7.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.0.conv1.weight']
+ new['encoder']['7.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.0.conv1.bias']
+ new['encoder']['7.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.0.norm2.weight']
+ new['encoder']['7.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.0.norm2.bias']
+ new['encoder']['7.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.0.conv2.weight']
+ new['encoder']['7.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.0.conv2.bias']
+ new['encoder']['7.residual_layer.weight'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.weight']
+ new['encoder']['7.residual_layer.bias'] = s['first_stage_model.encoder.down.2.block.0.nin_shortcut.bias']
+ new['encoder']['8.groupnorm_1.weight'] = s['first_stage_model.encoder.down.2.block.1.norm1.weight']
+ new['encoder']['8.groupnorm_1.bias'] = s['first_stage_model.encoder.down.2.block.1.norm1.bias']
+ new['encoder']['8.conv_1.weight'] = s['first_stage_model.encoder.down.2.block.1.conv1.weight']
+ new['encoder']['8.conv_1.bias'] = s['first_stage_model.encoder.down.2.block.1.conv1.bias']
+ new['encoder']['8.groupnorm_2.weight'] = s['first_stage_model.encoder.down.2.block.1.norm2.weight']
+ new['encoder']['8.groupnorm_2.bias'] = s['first_stage_model.encoder.down.2.block.1.norm2.bias']
+ new['encoder']['8.conv_2.weight'] = s['first_stage_model.encoder.down.2.block.1.conv2.weight']
+ new['encoder']['8.conv_2.bias'] = s['first_stage_model.encoder.down.2.block.1.conv2.bias']
+ new['encoder']['9.weight'] = s['first_stage_model.encoder.down.2.downsample.conv.weight']
+ new['encoder']['9.bias'] = s['first_stage_model.encoder.down.2.downsample.conv.bias']
+ new['encoder']['10.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.0.norm1.weight']
+ new['encoder']['10.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.0.norm1.bias']
+ new['encoder']['10.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.0.conv1.weight']
+ new['encoder']['10.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.0.conv1.bias']
+ new['encoder']['10.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.0.norm2.weight']
+ new['encoder']['10.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.0.norm2.bias']
+ new['encoder']['10.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.0.conv2.weight']
+ new['encoder']['10.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.0.conv2.bias']
+ new['encoder']['11.groupnorm_1.weight'] = s['first_stage_model.encoder.down.3.block.1.norm1.weight']
+ new['encoder']['11.groupnorm_1.bias'] = s['first_stage_model.encoder.down.3.block.1.norm1.bias']
+ new['encoder']['11.conv_1.weight'] = s['first_stage_model.encoder.down.3.block.1.conv1.weight']
+ new['encoder']['11.conv_1.bias'] = s['first_stage_model.encoder.down.3.block.1.conv1.bias']
+ new['encoder']['11.groupnorm_2.weight'] = s['first_stage_model.encoder.down.3.block.1.norm2.weight']
+ new['encoder']['11.groupnorm_2.bias'] = s['first_stage_model.encoder.down.3.block.1.norm2.bias']
+ new['encoder']['11.conv_2.weight'] = s['first_stage_model.encoder.down.3.block.1.conv2.weight']
+ new['encoder']['11.conv_2.bias'] = s['first_stage_model.encoder.down.3.block.1.conv2.bias']
+ new['encoder']['12.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_1.norm1.weight']
+ new['encoder']['12.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_1.norm1.bias']
+ new['encoder']['12.conv_1.weight'] = s['first_stage_model.encoder.mid.block_1.conv1.weight']
+ new['encoder']['12.conv_1.bias'] = s['first_stage_model.encoder.mid.block_1.conv1.bias']
+ new['encoder']['12.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_1.norm2.weight']
+ new['encoder']['12.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_1.norm2.bias']
+ new['encoder']['12.conv_2.weight'] = s['first_stage_model.encoder.mid.block_1.conv2.weight']
+ new['encoder']['12.conv_2.bias'] = s['first_stage_model.encoder.mid.block_1.conv2.bias']
+ new['encoder']['13.groupnorm.weight'] = s['first_stage_model.encoder.mid.attn_1.norm.weight']
+ new['encoder']['13.groupnorm.bias'] = s['first_stage_model.encoder.mid.attn_1.norm.bias']
+ new['encoder']['13.attention.out_proj.bias'] = s['first_stage_model.encoder.mid.attn_1.proj_out.bias']
+ new['encoder']['14.groupnorm_1.weight'] = s['first_stage_model.encoder.mid.block_2.norm1.weight']
+ new['encoder']['14.groupnorm_1.bias'] = s['first_stage_model.encoder.mid.block_2.norm1.bias']
+ new['encoder']['14.conv_1.weight'] = s['first_stage_model.encoder.mid.block_2.conv1.weight']
+ new['encoder']['14.conv_1.bias'] = s['first_stage_model.encoder.mid.block_2.conv1.bias']
+ new['encoder']['14.groupnorm_2.weight'] = s['first_stage_model.encoder.mid.block_2.norm2.weight']
+ new['encoder']['14.groupnorm_2.bias'] = s['first_stage_model.encoder.mid.block_2.norm2.bias']
+ new['encoder']['14.conv_2.weight'] = s['first_stage_model.encoder.mid.block_2.conv2.weight']
+ new['encoder']['14.conv_2.bias'] = s['first_stage_model.encoder.mid.block_2.conv2.bias']
+ new['encoder']['15.weight'] = s['first_stage_model.encoder.norm_out.weight']
+ new['encoder']['15.bias'] = s['first_stage_model.encoder.norm_out.bias']
+ new['encoder']['17.weight'] = s['first_stage_model.encoder.conv_out.weight']
+ new['encoder']['17.bias'] = s['first_stage_model.encoder.conv_out.bias']
+ new['decoder']['1.weight'] = s['first_stage_model.decoder.conv_in.weight']
+ new['decoder']['1.bias'] = s['first_stage_model.decoder.conv_in.bias']
+ new['decoder']['2.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_1.norm1.weight']
+ new['decoder']['2.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_1.norm1.bias']
+ new['decoder']['2.conv_1.weight'] = s['first_stage_model.decoder.mid.block_1.conv1.weight']
+ new['decoder']['2.conv_1.bias'] = s['first_stage_model.decoder.mid.block_1.conv1.bias']
+ new['decoder']['2.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_1.norm2.weight']
+ new['decoder']['2.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_1.norm2.bias']
+ new['decoder']['2.conv_2.weight'] = s['first_stage_model.decoder.mid.block_1.conv2.weight']
+ new['decoder']['2.conv_2.bias'] = s['first_stage_model.decoder.mid.block_1.conv2.bias']
+ new['decoder']['3.groupnorm.weight'] = s['first_stage_model.decoder.mid.attn_1.norm.weight']
+ new['decoder']['3.groupnorm.bias'] = s['first_stage_model.decoder.mid.attn_1.norm.bias']
+ new['decoder']['3.attention.out_proj.bias'] = s['first_stage_model.decoder.mid.attn_1.proj_out.bias']
+ new['decoder']['4.groupnorm_1.weight'] = s['first_stage_model.decoder.mid.block_2.norm1.weight']
+ new['decoder']['4.groupnorm_1.bias'] = s['first_stage_model.decoder.mid.block_2.norm1.bias']
+ new['decoder']['4.conv_1.weight'] = s['first_stage_model.decoder.mid.block_2.conv1.weight']
+ new['decoder']['4.conv_1.bias'] = s['first_stage_model.decoder.mid.block_2.conv1.bias']
+ new['decoder']['4.groupnorm_2.weight'] = s['first_stage_model.decoder.mid.block_2.norm2.weight']
+ new['decoder']['4.groupnorm_2.bias'] = s['first_stage_model.decoder.mid.block_2.norm2.bias']
+ new['decoder']['4.conv_2.weight'] = s['first_stage_model.decoder.mid.block_2.conv2.weight']
+ new['decoder']['4.conv_2.bias'] = s['first_stage_model.decoder.mid.block_2.conv2.bias']
+ new['decoder']['20.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.0.norm1.weight']
+ new['decoder']['20.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.0.norm1.bias']
+ new['decoder']['20.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.0.conv1.weight']
+ new['decoder']['20.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.0.conv1.bias']
+ new['decoder']['20.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.0.norm2.weight']
+ new['decoder']['20.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.0.norm2.bias']
+ new['decoder']['20.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.0.conv2.weight']
+ new['decoder']['20.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.0.conv2.bias']
+ new['decoder']['20.residual_layer.weight'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.weight']
+ new['decoder']['20.residual_layer.bias'] = s['first_stage_model.decoder.up.0.block.0.nin_shortcut.bias']
+ new['decoder']['21.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.1.norm1.weight']
+ new['decoder']['21.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.1.norm1.bias']
+ new['decoder']['21.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.1.conv1.weight']
+ new['decoder']['21.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.1.conv1.bias']
+ new['decoder']['21.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.1.norm2.weight']
+ new['decoder']['21.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.1.norm2.bias']
+ new['decoder']['21.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.1.conv2.weight']
+ new['decoder']['21.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.1.conv2.bias']
+ new['decoder']['22.groupnorm_1.weight'] = s['first_stage_model.decoder.up.0.block.2.norm1.weight']
+ new['decoder']['22.groupnorm_1.bias'] = s['first_stage_model.decoder.up.0.block.2.norm1.bias']
+ new['decoder']['22.conv_1.weight'] = s['first_stage_model.decoder.up.0.block.2.conv1.weight']
+ new['decoder']['22.conv_1.bias'] = s['first_stage_model.decoder.up.0.block.2.conv1.bias']
+ new['decoder']['22.groupnorm_2.weight'] = s['first_stage_model.decoder.up.0.block.2.norm2.weight']
+ new['decoder']['22.groupnorm_2.bias'] = s['first_stage_model.decoder.up.0.block.2.norm2.bias']
+ new['decoder']['22.conv_2.weight'] = s['first_stage_model.decoder.up.0.block.2.conv2.weight']
+ new['decoder']['22.conv_2.bias'] = s['first_stage_model.decoder.up.0.block.2.conv2.bias']
+ new['decoder']['15.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.0.norm1.weight']
+ new['decoder']['15.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.0.norm1.bias']
+ new['decoder']['15.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.0.conv1.weight']
+ new['decoder']['15.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.0.conv1.bias']
+ new['decoder']['15.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.0.norm2.weight']
+ new['decoder']['15.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.0.norm2.bias']
+ new['decoder']['15.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.0.conv2.weight']
+ new['decoder']['15.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.0.conv2.bias']
+ new['decoder']['15.residual_layer.weight'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.weight']
+ new['decoder']['15.residual_layer.bias'] = s['first_stage_model.decoder.up.1.block.0.nin_shortcut.bias']
+ new['decoder']['16.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.1.norm1.weight']
+ new['decoder']['16.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.1.norm1.bias']
+ new['decoder']['16.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.1.conv1.weight']
+ new['decoder']['16.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.1.conv1.bias']
+ new['decoder']['16.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.1.norm2.weight']
+ new['decoder']['16.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.1.norm2.bias']
+ new['decoder']['16.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.1.conv2.weight']
+ new['decoder']['16.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.1.conv2.bias']
+ new['decoder']['17.groupnorm_1.weight'] = s['first_stage_model.decoder.up.1.block.2.norm1.weight']
+ new['decoder']['17.groupnorm_1.bias'] = s['first_stage_model.decoder.up.1.block.2.norm1.bias']
+ new['decoder']['17.conv_1.weight'] = s['first_stage_model.decoder.up.1.block.2.conv1.weight']
+ new['decoder']['17.conv_1.bias'] = s['first_stage_model.decoder.up.1.block.2.conv1.bias']
+ new['decoder']['17.groupnorm_2.weight'] = s['first_stage_model.decoder.up.1.block.2.norm2.weight']
+ new['decoder']['17.groupnorm_2.bias'] = s['first_stage_model.decoder.up.1.block.2.norm2.bias']
+ new['decoder']['17.conv_2.weight'] = s['first_stage_model.decoder.up.1.block.2.conv2.weight']
+ new['decoder']['17.conv_2.bias'] = s['first_stage_model.decoder.up.1.block.2.conv2.bias']
+ new['decoder']['19.weight'] = s['first_stage_model.decoder.up.1.upsample.conv.weight']
+ new['decoder']['19.bias'] = s['first_stage_model.decoder.up.1.upsample.conv.bias']
+ new['decoder']['10.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.0.norm1.weight']
+ new['decoder']['10.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.0.norm1.bias']
+ new['decoder']['10.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.0.conv1.weight']
+ new['decoder']['10.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.0.conv1.bias']
+ new['decoder']['10.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.0.norm2.weight']
+ new['decoder']['10.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.0.norm2.bias']
+ new['decoder']['10.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.0.conv2.weight']
+ new['decoder']['10.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.0.conv2.bias']
+ new['decoder']['11.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.1.norm1.weight']
+ new['decoder']['11.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.1.norm1.bias']
+ new['decoder']['11.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.1.conv1.weight']
+ new['decoder']['11.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.1.conv1.bias']
+ new['decoder']['11.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.1.norm2.weight']
+ new['decoder']['11.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.1.norm2.bias']
+ new['decoder']['11.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.1.conv2.weight']
+ new['decoder']['11.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.1.conv2.bias']
+ new['decoder']['12.groupnorm_1.weight'] = s['first_stage_model.decoder.up.2.block.2.norm1.weight']
+ new['decoder']['12.groupnorm_1.bias'] = s['first_stage_model.decoder.up.2.block.2.norm1.bias']
+ new['decoder']['12.conv_1.weight'] = s['first_stage_model.decoder.up.2.block.2.conv1.weight']
+ new['decoder']['12.conv_1.bias'] = s['first_stage_model.decoder.up.2.block.2.conv1.bias']
+ new['decoder']['12.groupnorm_2.weight'] = s['first_stage_model.decoder.up.2.block.2.norm2.weight']
+ new['decoder']['12.groupnorm_2.bias'] = s['first_stage_model.decoder.up.2.block.2.norm2.bias']
+ new['decoder']['12.conv_2.weight'] = s['first_stage_model.decoder.up.2.block.2.conv2.weight']
+ new['decoder']['12.conv_2.bias'] = s['first_stage_model.decoder.up.2.block.2.conv2.bias']
+ new['decoder']['14.weight'] = s['first_stage_model.decoder.up.2.upsample.conv.weight']
+ new['decoder']['14.bias'] = s['first_stage_model.decoder.up.2.upsample.conv.bias']
+ new['decoder']['5.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.0.norm1.weight']
+ new['decoder']['5.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.0.norm1.bias']
+ new['decoder']['5.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.0.conv1.weight']
+ new['decoder']['5.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.0.conv1.bias']
+ new['decoder']['5.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.0.norm2.weight']
+ new['decoder']['5.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.0.norm2.bias']
+ new['decoder']['5.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.0.conv2.weight']
+ new['decoder']['5.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.0.conv2.bias']
+ new['decoder']['6.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.1.norm1.weight']
+ new['decoder']['6.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.1.norm1.bias']
+ new['decoder']['6.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.1.conv1.weight']
+ new['decoder']['6.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.1.conv1.bias']
+ new['decoder']['6.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.1.norm2.weight']
+ new['decoder']['6.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.1.norm2.bias']
+ new['decoder']['6.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.1.conv2.weight']
+ new['decoder']['6.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.1.conv2.bias']
+ new['decoder']['7.groupnorm_1.weight'] = s['first_stage_model.decoder.up.3.block.2.norm1.weight']
+ new['decoder']['7.groupnorm_1.bias'] = s['first_stage_model.decoder.up.3.block.2.norm1.bias']
+ new['decoder']['7.conv_1.weight'] = s['first_stage_model.decoder.up.3.block.2.conv1.weight']
+ new['decoder']['7.conv_1.bias'] = s['first_stage_model.decoder.up.3.block.2.conv1.bias']
+ new['decoder']['7.groupnorm_2.weight'] = s['first_stage_model.decoder.up.3.block.2.norm2.weight']
+ new['decoder']['7.groupnorm_2.bias'] = s['first_stage_model.decoder.up.3.block.2.norm2.bias']
+ new['decoder']['7.conv_2.weight'] = s['first_stage_model.decoder.up.3.block.2.conv2.weight']
+ new['decoder']['7.conv_2.bias'] = s['first_stage_model.decoder.up.3.block.2.conv2.bias']
+ new['decoder']['9.weight'] = s['first_stage_model.decoder.up.3.upsample.conv.weight']
+ new['decoder']['9.bias'] = s['first_stage_model.decoder.up.3.upsample.conv.bias']
+ new['decoder']['23.weight'] = s['first_stage_model.decoder.norm_out.weight']
+ new['decoder']['23.bias'] = s['first_stage_model.decoder.norm_out.bias']
+ new['decoder']['25.weight'] = s['first_stage_model.decoder.conv_out.weight']
+ new['decoder']['25.bias'] = s['first_stage_model.decoder.conv_out.bias']
+ new['encoder']['18.weight'] = s['first_stage_model.quant_conv.weight']
+ new['encoder']['18.bias'] = s['first_stage_model.quant_conv.bias']
+ new['decoder']['0.weight'] = s['first_stage_model.post_quant_conv.weight']
+ new['decoder']['0.bias'] = s['first_stage_model.post_quant_conv.bias']
+ new['clip']['embedding.token_embedding.weight'] = s['cond_stage_model.transformer.text_model.embeddings.token_embedding.weight']
+ new['clip']['embedding.position_value'] = s['cond_stage_model.transformer.text_model.embeddings.position_embedding.weight']
+ new['clip']['layers.0.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight']
+ new['clip']['layers.0.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias']
+ new['clip']['layers.0.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight']
+ new['clip']['layers.0.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias']
+ new['clip']['layers.0.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight']
+ new['clip']['layers.0.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias']
+ new['clip']['layers.0.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight']
+ new['clip']['layers.0.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias']
+ new['clip']['layers.0.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight']
+ new['clip']['layers.0.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias']
+ new['clip']['layers.1.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight']
+ new['clip']['layers.1.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias']
+ new['clip']['layers.1.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight']
+ new['clip']['layers.1.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias']
+ new['clip']['layers.1.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight']
+ new['clip']['layers.1.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias']
+ new['clip']['layers.1.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight']
+ new['clip']['layers.1.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias']
+ new['clip']['layers.1.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight']
+ new['clip']['layers.1.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias']
+ new['clip']['layers.2.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight']
+ new['clip']['layers.2.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias']
+ new['clip']['layers.2.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight']
+ new['clip']['layers.2.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias']
+ new['clip']['layers.2.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight']
+ new['clip']['layers.2.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias']
+ new['clip']['layers.2.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight']
+ new['clip']['layers.2.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias']
+ new['clip']['layers.2.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight']
+ new['clip']['layers.2.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias']
+ new['clip']['layers.3.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight']
+ new['clip']['layers.3.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias']
+ new['clip']['layers.3.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight']
+ new['clip']['layers.3.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias']
+ new['clip']['layers.3.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight']
+ new['clip']['layers.3.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias']
+ new['clip']['layers.3.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight']
+ new['clip']['layers.3.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias']
+ new['clip']['layers.3.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight']
+ new['clip']['layers.3.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias']
+ new['clip']['layers.4.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight']
+ new['clip']['layers.4.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias']
+ new['clip']['layers.4.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight']
+ new['clip']['layers.4.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias']
+ new['clip']['layers.4.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight']
+ new['clip']['layers.4.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias']
+ new['clip']['layers.4.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight']
+ new['clip']['layers.4.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias']
+ new['clip']['layers.4.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight']
+ new['clip']['layers.4.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias']
+ new['clip']['layers.5.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight']
+ new['clip']['layers.5.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias']
+ new['clip']['layers.5.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight']
+ new['clip']['layers.5.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias']
+ new['clip']['layers.5.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight']
+ new['clip']['layers.5.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias']
+ new['clip']['layers.5.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight']
+ new['clip']['layers.5.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias']
+ new['clip']['layers.5.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight']
+ new['clip']['layers.5.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias']
+ new['clip']['layers.6.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight']
+ new['clip']['layers.6.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias']
+ new['clip']['layers.6.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight']
+ new['clip']['layers.6.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias']
+ new['clip']['layers.6.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight']
+ new['clip']['layers.6.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias']
+ new['clip']['layers.6.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight']
+ new['clip']['layers.6.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias']
+ new['clip']['layers.6.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight']
+ new['clip']['layers.6.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias']
+ new['clip']['layers.7.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight']
+ new['clip']['layers.7.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias']
+ new['clip']['layers.7.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight']
+ new['clip']['layers.7.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias']
+ new['clip']['layers.7.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight']
+ new['clip']['layers.7.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias']
+ new['clip']['layers.7.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight']
+ new['clip']['layers.7.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias']
+ new['clip']['layers.7.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight']
+ new['clip']['layers.7.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias']
+ new['clip']['layers.8.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight']
+ new['clip']['layers.8.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias']
+ new['clip']['layers.8.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight']
+ new['clip']['layers.8.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias']
+ new['clip']['layers.8.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight']
+ new['clip']['layers.8.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias']
+ new['clip']['layers.8.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight']
+ new['clip']['layers.8.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias']
+ new['clip']['layers.8.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight']
+ new['clip']['layers.8.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias']
+ new['clip']['layers.9.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight']
+ new['clip']['layers.9.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias']
+ new['clip']['layers.9.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight']
+ new['clip']['layers.9.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias']
+ new['clip']['layers.9.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight']
+ new['clip']['layers.9.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias']
+ new['clip']['layers.9.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight']
+ new['clip']['layers.9.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias']
+ new['clip']['layers.9.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight']
+ new['clip']['layers.9.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias']
+ new['clip']['layers.10.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight']
+ new['clip']['layers.10.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias']
+ new['clip']['layers.10.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight']
+ new['clip']['layers.10.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias']
+ new['clip']['layers.10.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight']
+ new['clip']['layers.10.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias']
+ new['clip']['layers.10.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight']
+ new['clip']['layers.10.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias']
+ new['clip']['layers.10.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight']
+ new['clip']['layers.10.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias']
+ new['clip']['layers.11.attention.out_proj.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight']
+ new['clip']['layers.11.attention.out_proj.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias']
+ new['clip']['layers.11.layernorm_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight']
+ new['clip']['layers.11.layernorm_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias']
+ new['clip']['layers.11.linear_1.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight']
+ new['clip']['layers.11.linear_1.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias']
+ new['clip']['layers.11.linear_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight']
+ new['clip']['layers.11.linear_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias']
+ new['clip']['layers.11.layernorm_2.weight'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight']
+ new['clip']['layers.11.layernorm_2.bias'] = s['cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias']
+ new['clip']['layernorm.weight'] = s['cond_stage_model.transformer.text_model.final_layer_norm.weight']
+ new['clip']['layernorm.bias'] = s['cond_stage_model.transformer.text_model.final_layer_norm.bias']
+ new['diffusion']['unet.encoders.1.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.encoders.2.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.encoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.encoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.encoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.encoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.bottleneck.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.3.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.4.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.5.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.6.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.7.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.8.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.9.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.10.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['diffusion']['unet.decoders.11.1.attention_1.in_proj.weight'] = torch.cat((s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight'], s['model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight']), 0)
+ new['encoder']['13.attention.in_proj.weight'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.weight'], s['first_stage_model.encoder.mid.attn_1.k.weight'], s['first_stage_model.encoder.mid.attn_1.v.weight']), 0).reshape((1536, 512))
+ new['encoder']['13.attention.in_proj.bias'] = torch.cat((s['first_stage_model.encoder.mid.attn_1.q.bias'], s['first_stage_model.encoder.mid.attn_1.k.bias'], s['first_stage_model.encoder.mid.attn_1.v.bias']), 0)
+ new['encoder']['13.attention.out_proj.weight'] = s['first_stage_model.encoder.mid.attn_1.proj_out.weight'].reshape((512, 512))
+ new['decoder']['3.attention.in_proj.weight'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.weight'], s['first_stage_model.decoder.mid.attn_1.k.weight'], s['first_stage_model.decoder.mid.attn_1.v.weight']), 0).reshape((1536, 512))
+ new['decoder']['3.attention.in_proj.bias'] = torch.cat((s['first_stage_model.decoder.mid.attn_1.q.bias'], s['first_stage_model.decoder.mid.attn_1.k.bias'], s['first_stage_model.decoder.mid.attn_1.v.bias']), 0)
+ new['decoder']['3.attention.out_proj.weight'] = s['first_stage_model.decoder.mid.attn_1.proj_out.weight'].reshape((512, 512))
+ new['clip']['layers.0.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.0.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.1.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.1.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.2.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.2.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.3.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.3.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.4.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.4.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.5.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.5.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.6.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.6.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.7.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.7.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.8.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.8.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.9.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.9.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.10.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.10.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias']), 0)
+ new['clip']['layers.11.attention.in_proj.weight'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight']), 0)
+ new['clip']['layers.11.attention.in_proj.bias'] = torch.cat((s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias'], s['cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias']), 0)
+ return new
+
+
From 05bbb2b10896a36c0694d33d6a48d97da9995026 Mon Sep 17 00:00:00 2001
From: a
Date: Fri, 1 Mar 2024 15:59:39 +0000
Subject: [PATCH 02/11] add a demo for loading an original stable diffusion
checkpoint and generating an image
---
.gitignore | 3 ++-
demo_loadmodel.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 demo_loadmodel.py
diff --git a/.gitignore b/.gitignore
index 8084e7b..7c63b63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -201,4 +201,5 @@ cython_debug/
data/
ddata/
migrators/
-data.*.tar
\ No newline at end of file
+data.*.tar
+out/
diff --git a/demo_loadmodel.py b/demo_loadmodel.py
new file mode 100644
index 0000000..27b65a9
--- /dev/null
+++ b/demo_loadmodel.py
@@ -0,0 +1,49 @@
+# python3
+# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images.
+import os
+import safetensors.torch
+import torch
+from stable_diffusion_pytorch import pipeline, checkpoint_loader
+
+# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'.
+import intel_extension_for_pytorch as ipex
+device = 'xpu'
+
+
+# where to store generated images
+outDir = './out'
+
+# path to SD 1.4/1.5 based model safetensors file
+modelPath = '/stor/download2/anything_inkBase.safetensors'
+
+# if true, use float16, otherwise float32
+useHalfPrecision = True
+
+
+
+os.makedirs(outDir, exist_ok=True)
+
+# load the checkpoint file and convert to half precision if needed
+state_dict = safetensors.torch.load_file(modelPath)
+if useHalfPrecision:
+ for x in state_dict:
+ state_dict[x] = state_dict[x].half()
+
+# convert to the state_dicts format that our library expects
+state_dicts = checkpoint_loader.split_state_dict(state_dict)
+
+# create the model objects, and apply the weights in state_dicts
+models = checkpoint_loader.load_models(state_dicts, device, useHalfPrecision)
+
+steps = 40
+seed = 12345
+prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours'
+negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting'
+
+fileName = prompt.replace(' ', '_').replace('\\', '\').replace(',', '-') + '.' + str(seed) + '.png'
+
+images = pipeline.generate([prompt], uncond_prompts=[negativePrompt],
+ models=models, n_inference_steps=steps, seed=seed, device=device,
+ height=768, width=512)
+
+images[0].save(outDir + '/' + fileName)
From abe8465ea3b9795c5b1db0ba866c70e1cceed85d Mon Sep 17 00:00:00 2001
From: a
Date: Fri, 1 Mar 2024 16:03:07 +0000
Subject: [PATCH 03/11] demo_loadmodel: don't put colons in filename (not
allowed on windows)
---
demo_loadmodel.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/demo_loadmodel.py b/demo_loadmodel.py
index 27b65a9..112e035 100644
--- a/demo_loadmodel.py
+++ b/demo_loadmodel.py
@@ -40,7 +40,7 @@
prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours'
negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting'
-fileName = prompt.replace(' ', '_').replace('\\', '\').replace(',', '-') + '.' + str(seed) + '.png'
+fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png'
images = pipeline.generate([prompt], uncond_prompts=[negativePrompt],
models=models, n_inference_steps=steps, seed=seed, device=device,
From 38fb152537ec7d3fd6fc3fdd7c499797d0c3c7e8 Mon Sep 17 00:00:00 2001
From: a
Date: Fri, 1 Mar 2024 16:06:05 +0000
Subject: [PATCH 04/11] demo_loadmodel: add link for the model used
---
demo_loadmodel.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/demo_loadmodel.py b/demo_loadmodel.py
index 112e035..27588a7 100644
--- a/demo_loadmodel.py
+++ b/demo_loadmodel.py
@@ -14,7 +14,7 @@
outDir = './out'
# path to SD 1.4/1.5 based model safetensors file
-modelPath = '/stor/download2/anything_inkBase.safetensors'
+modelPath = '/stor/download2/anything_inkBase.safetensors' # download from https://civitai.com/models/9409?modelVersionId=90854
# if true, use float16, otherwise float32
useHalfPrecision = True
From 20f7f972fd949e06569833dfe61e883e21c65bcf Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 07:22:53 +0000
Subject: [PATCH 05/11] move checkpoint_loader to convert_from_sdmodel and
model_loader; update example
---
.gitignore | 1 +
demo_loadmodel.py | 49 -------------
demo_sdmodel_generate_image.py | 46 ++++++++++++
...oint_loader.py => convert_from_sdmodel.py} | 36 +---------
stable_diffusion_pytorch/model_loader.py | 72 +++++++++++++++----
5 files changed, 107 insertions(+), 97 deletions(-)
delete mode 100644 demo_loadmodel.py
create mode 100644 demo_sdmodel_generate_image.py
rename stable_diffusion_pytorch/{checkpoint_loader.py => convert_from_sdmodel.py} (98%)
diff --git a/.gitignore b/.gitignore
index 7c63b63..f4104ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -202,4 +202,5 @@ data/
ddata/
migrators/
data.*.tar
+sd-v1-5.safetensors
out/
diff --git a/demo_loadmodel.py b/demo_loadmodel.py
deleted file mode 100644
index 27588a7..0000000
--- a/demo_loadmodel.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# python3
-# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images.
-import os
-import safetensors.torch
-import torch
-from stable_diffusion_pytorch import pipeline, checkpoint_loader
-
-# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'.
-import intel_extension_for_pytorch as ipex
-device = 'xpu'
-
-
-# where to store generated images
-outDir = './out'
-
-# path to SD 1.4/1.5 based model safetensors file
-modelPath = '/stor/download2/anything_inkBase.safetensors' # download from https://civitai.com/models/9409?modelVersionId=90854
-
-# if true, use float16, otherwise float32
-useHalfPrecision = True
-
-
-
-os.makedirs(outDir, exist_ok=True)
-
-# load the checkpoint file and convert to half precision if needed
-state_dict = safetensors.torch.load_file(modelPath)
-if useHalfPrecision:
- for x in state_dict:
- state_dict[x] = state_dict[x].half()
-
-# convert to the state_dicts format that our library expects
-state_dicts = checkpoint_loader.split_state_dict(state_dict)
-
-# create the model objects, and apply the weights in state_dicts
-models = checkpoint_loader.load_models(state_dicts, device, useHalfPrecision)
-
-steps = 40
-seed = 12345
-prompt = '1girl,living room,silver leotard,navel,bunny girl,cute,silver hairs,black eyes,cleavage,(leaning_forward:1.6),cafe,black stocking,papilla,all fours'
-negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers,three hands,three legs,bad arms,missing legs,missing arms,poorly drawn face,bad face,fused face,cloned face,three crus,fused feet,fused thigh,extra crus,ugly fingers,horn,realistic photo,huge eyes,worst face,2girl,long fingers,disconnected limbs,worst quality,normal quality,low quality,low res,blurry,text,watermark,logo,banner,extra digits,cropped,jpeg artifacts,signature,username,error,sketch ,duplicate,ugly,monochrome,horror,geometry,mutation,disgusting'
-
-fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png'
-
-images = pipeline.generate([prompt], uncond_prompts=[negativePrompt],
- models=models, n_inference_steps=steps, seed=seed, device=device,
- height=768, width=512)
-
-images[0].save(outDir + '/' + fileName)
diff --git a/demo_sdmodel_generate_image.py b/demo_sdmodel_generate_image.py
new file mode 100644
index 0000000..f408d56
--- /dev/null
+++ b/demo_sdmodel_generate_image.py
@@ -0,0 +1,46 @@
+# python3
+# A demo of loading a third party model (everythingV5) meant for the original stable diffusion code, and generating images.
+import os
+import safetensors.torch
+import torch
+from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader
+
+# Runs on intel gpu by default, to run on cuda remove the following import and change 'xpu' to 'cuda'.
+import intel_extension_for_pytorch as ipex
+device = 'xpu'
+
+
+# where to store generated images
+outDir = './out'
+
+# path to SD 1.4/1.5 based model safetensors file
+modelPath = './sd-v1-5.safetensors' # download from https://huggingface.co/tabtap/sd-v1-5.safetensor/tree/main
+
+# either float16 or float32
+dtype = torch.float16
+
+
+
+os.makedirs(outDir, exist_ok=True)
+
+# load the checkpoint file
+state_dict = safetensors.torch.load_file(modelPath)
+
+# convert to the state_dicts format that our library expects
+state_dicts = convert_from_sdmodel.split_state_dict(state_dict)
+
+# create the model objects, and apply the weights in state_dicts
+models = model_loader.load_models(state_dicts, device, dtype)
+
+steps = 40
+seed = 12345
+prompt = '1girl,cirno,anime style'
+negativePrompt = 'bad anatomy,bad hands,missing fingers,extra fingers'
+
+fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png'
+
+images = pipeline.generate([prompt], uncond_prompts=[negativePrompt],
+ models=models, n_inference_steps=steps, seed=seed, device=device,
+ height=768, width=512)
+
+images[0].save(outDir + '/' + fileName)
diff --git a/stable_diffusion_pytorch/checkpoint_loader.py b/stable_diffusion_pytorch/convert_from_sdmodel.py
similarity index 98%
rename from stable_diffusion_pytorch/checkpoint_loader.py
rename to stable_diffusion_pytorch/convert_from_sdmodel.py
index c21d27a..3f09b1b 100644
--- a/stable_diffusion_pytorch/checkpoint_loader.py
+++ b/stable_diffusion_pytorch/convert_from_sdmodel.py
@@ -2,43 +2,11 @@
import safetensors.torch
from . import Tokenizer, CLIP, Encoder, Decoder, Diffusion
-r"""
- Create the 4 models that the pipeline expects and load the weights from state_dicts (not an original stable diffusion state_dict!).
- Args:
- state_dicts (`Dict[str, str]`):
- A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights for that model.
- You can pass in the dict returned by split_state_dict().
- device (`str`):
- The device to run the models on, passed to model.to()
- useHalfPrecision (`bool`, *optional*):
- If true, use float16, otherwise float32.
- Returns:
- `Dict[str, torch.nn.Module]`:
- The loaded models to be passed to pipeline.generate()
- """
-def load_models(state_dicts, device, useHalfPrecision=False):
- models = {}
- if useHalfPrecision:
- models['clip'] = CLIP().to(device).half()
- models['encoder'] = Encoder().to(device).half()
- models['decoder'] = Decoder().to(device).half()
- models['diffusion'] = Diffusion().to(device).half()
- else:
- models['clip'] = CLIP().to(device)
- models['encoder'] = Encoder().to(device)
- models['decoder'] = Decoder().to(device)
- models['diffusion'] = Diffusion().to(device)
-
- models['clip'].load_state_dict(state_dicts['clip'])
- models['encoder'].load_state_dict(state_dicts['encoder'])
- models['decoder'].load_state_dict(state_dicts['decoder'])
- models['diffusion'].load_state_dict(state_dicts['diffusion'])
- return models
-
r"""
Our library model implementation is laid out differently from the original stable diffusion, so
original SD state_dict can not be directly loaded. This function converts an original SD
- state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that our models expect.
+ state_dict to the 4 state_dicts (clip, encoder, decoder, diffusion) that can be passed to
+ model_loader.load_models()
Args:
state_dict (`Dict[str, str]`):
The original state_dict to convert.
diff --git a/stable_diffusion_pytorch/model_loader.py b/stable_diffusion_pytorch/model_loader.py
index 9e5bd89..a87dc4c 100644
--- a/stable_diffusion_pytorch/model_loader.py
+++ b/stable_diffusion_pytorch/model_loader.py
@@ -31,42 +31,86 @@ def make_compatible(state_dict):
return state_dict
-def load_clip(device):
+# The following functions load the default models, used when the user supplied `models` is missing
+# one or more of (clip, encoder, decoder, diffusion).
+
+def load_clip(device, dtype):
state_dict = torch.load(util.get_file_path('ckpt/clip.pt'))
state_dict = make_compatible(state_dict)
- clip = CLIP().to(device)
+ clip = CLIP().to(dtype).to(device)
clip.load_state_dict(state_dict)
return clip
-def load_encoder(device):
+def load_encoder(device, dtype):
state_dict = torch.load(util.get_file_path('ckpt/encoder.pt'))
state_dict = make_compatible(state_dict)
- encoder = Encoder().to(device)
+ encoder = Encoder().to(dtype).to(device)
encoder.load_state_dict(state_dict)
return encoder
-def load_decoder(device):
+def load_decoder(device, dtype):
state_dict = torch.load(util.get_file_path('ckpt/decoder.pt'))
state_dict = make_compatible(state_dict)
- decoder = Decoder().to(device)
+ decoder = Decoder().to(dtype).to(device)
decoder.load_state_dict(state_dict)
return decoder
-def load_diffusion(device):
+def load_diffusion(device, dtype):
state_dict = torch.load(util.get_file_path('ckpt/diffusion.pt'))
state_dict = make_compatible(state_dict)
- diffusion = Diffusion().to(device)
+ diffusion = Diffusion().to(dtype).to(device)
diffusion.load_state_dict(state_dict)
return diffusion
-def preload_models(device):
+r"""
+ Create and load the 4 default models (clip, encoder, decoder, diffusion).
+ Args:
+ device (`str`):
+ The device to run the models on, passed to model.to()
+ dtype (`torch.dtype`, *optional*):
+ The data type of the model to create. Note that this can be different
+ from the data type of the checkpoint file, and pytorch will auto convert.
+ Returns:
+ `Dict[str, torch.nn.Module]`:
+ The loaded models to be passed to pipeline.generate()
+ """
+def preload_models(device, dtype=torch.float32):
return {
- 'clip': load_clip(device),
- 'encoder': load_encoder(device),
- 'decoder': load_decoder(device),
- 'diffusion': load_diffusion(device),
- }
\ No newline at end of file
+ 'clip': load_clip(device, dtype),
+ 'encoder': load_encoder(device, dtype),
+ 'decoder': load_decoder(device, dtype),
+ 'diffusion': load_diffusion(device, dtype),
+ }
+
+r"""
+ Create the 4 models that the pipeline expects and load the weights from state_dicts
+ (not an original stable diffusion state_dict!).
+ Args:
+ state_dicts (`Dict[str, str]`):
+ A dict with 4 keys: clip, encoder, decoder, diffusion; each key's value is a dict of weights
+ for that model. You can pass in the dict returned by convert_from_sdmodel.split_state_dict().
+ device (`str`):
+ The device to run the models on, passed to model.to()
+ dtype (`torch.dtype`, *optional*):
+ The data type of the model to create. Note that this can be different
+ from the data type of the checkpoint file, and pytorch will auto convert.
+ Returns:
+ `Dict[str, torch.nn.Module]`:
+ The loaded models to be passed to pipeline.generate()
+ """
+def load_models(state_dicts, device, dtype=torch.float32):
+ models = {}
+ models['clip'] = CLIP().to(dtype).to(device)
+ models['encoder'] = Encoder().to(dtype).to(device)
+ models['decoder'] = Decoder().to(dtype).to(device)
+ models['diffusion'] = Diffusion().to(dtype).to(device)
+
+ models['clip'].load_state_dict(state_dicts['clip'])
+ models['encoder'].load_state_dict(state_dicts['encoder'])
+ models['decoder'].load_state_dict(state_dicts['decoder'])
+ models['diffusion'].load_state_dict(state_dicts['diffusion'])
+ return models
From 536e04ab0414ff50b45e32fe8164780c944184eb Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:02:36 +0000
Subject: [PATCH 06/11] demo_sdmodel_generate_image.py: convert tabs to spaces
---
demo_sdmodel_generate_image.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/demo_sdmodel_generate_image.py b/demo_sdmodel_generate_image.py
index f408d56..efaf0a2 100644
--- a/demo_sdmodel_generate_image.py
+++ b/demo_sdmodel_generate_image.py
@@ -40,7 +40,7 @@
fileName = prompt.replace(' ', '_').replace('\\', '\').replace(':', '⦂').replace(',', '-') + '.' + str(seed) + '.png'
images = pipeline.generate([prompt], uncond_prompts=[negativePrompt],
- models=models, n_inference_steps=steps, seed=seed, device=device,
- height=768, width=512)
+ models=models, n_inference_steps=steps, seed=seed, device=device,
+ height=768, width=512)
images[0].save(outDir + '/' + fileName)
From 29222fcd7119ad844746b4c1f47000c7e3b55beb Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:02:46 +0000
Subject: [PATCH 07/11] add convert_model.py
---
convert_model.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 convert_model.py
diff --git a/convert_model.py b/convert_model.py
new file mode 100644
index 0000000..671239c
--- /dev/null
+++ b/convert_model.py
@@ -0,0 +1,28 @@
+import argparse
+import safetensors.torch
+from stable_diffusion_pytorch import convert_from_sdmodel
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--sd_model", type=str, required=True,
+ help="Stable Diffusion model to load in safetensors format"
+)
+parser.add_argument(
+ "--save_to", type=str, required=True,
+ help="The prefix of the path to save to, for example \"./mymodel_\" " \
+ "will save 4 files, mymodel_clip.safetensors, mymodel_decoder.safetensors, " \
+ "mymodel_encoder.safetensors, and mymodel_diffusion.safetensors"
+)
+
+args = parser.parse_args()
+
+# load the checkpoint file
+state_dict = safetensors.torch.load_file(args.sd_model)
+
+# convert to the state_dicts format that our library expects
+state_dicts = convert_from_sdmodel.split_state_dict(state_dict)
+
+for key in state_dicts:
+ outPath = args.save_to + key + '.safetensors'
+ print('Writing', outPath)
+ safetensors.torch.save_file(state_dicts[key], outPath)
From bfb931ab72c60a28fe962882a110f4d704fc81f5 Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:06:45 +0000
Subject: [PATCH 08/11] default dtype float32
---
stable_diffusion_pytorch/model_loader.py | 8 ++++----
stable_diffusion_pytorch/pipeline.py | 2 ++
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/stable_diffusion_pytorch/model_loader.py b/stable_diffusion_pytorch/model_loader.py
index a87dc4c..1756ad6 100644
--- a/stable_diffusion_pytorch/model_loader.py
+++ b/stable_diffusion_pytorch/model_loader.py
@@ -34,7 +34,7 @@ def make_compatible(state_dict):
# The following functions load the default models, used when the user supplied `models` is missing
# one or more of (clip, encoder, decoder, diffusion).
-def load_clip(device, dtype):
+def load_clip(device, dtype=torch.float32):
state_dict = torch.load(util.get_file_path('ckpt/clip.pt'))
state_dict = make_compatible(state_dict)
@@ -42,7 +42,7 @@ def load_clip(device, dtype):
clip.load_state_dict(state_dict)
return clip
-def load_encoder(device, dtype):
+def load_encoder(device, dtype=torch.float32):
state_dict = torch.load(util.get_file_path('ckpt/encoder.pt'))
state_dict = make_compatible(state_dict)
@@ -50,7 +50,7 @@ def load_encoder(device, dtype):
encoder.load_state_dict(state_dict)
return encoder
-def load_decoder(device, dtype):
+def load_decoder(device, dtype=torch.float32):
state_dict = torch.load(util.get_file_path('ckpt/decoder.pt'))
state_dict = make_compatible(state_dict)
@@ -58,7 +58,7 @@ def load_decoder(device, dtype):
decoder.load_state_dict(state_dict)
return decoder
-def load_diffusion(device, dtype):
+def load_diffusion(device, dtype=torch.float32):
state_dict = torch.load(util.get_file_path('ckpt/diffusion.pt'))
state_dict = make_compatible(state_dict)
diff --git a/stable_diffusion_pytorch/pipeline.py b/stable_diffusion_pytorch/pipeline.py
index b5d1868..3d16208 100644
--- a/stable_diffusion_pytorch/pipeline.py
+++ b/stable_diffusion_pytorch/pipeline.py
@@ -58,6 +58,8 @@ def generate(
expense of slower inference. This parameter will be modulated by `strength`.
models (`Dict[str, torch.nn.Module]`, *optional*):
Preloaded models. If some or all models are not provided, they will be loaded dynamically.
+ Note that auto-loaded modules default to float32, so if you are using float16 models, you must
+ provide all 4 models.
seed (`int`, *optional*):
A seed to make generation deterministic.
device (`str` or `torch.device`, *optional*):
From 95b1e25b7879a1fa09103db7b9ebf373cbbd4de4 Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:16:49 +0000
Subject: [PATCH 09/11] convert_model.py: add help text
---
convert_model.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/convert_model.py b/convert_model.py
index 671239c..c72460f 100644
--- a/convert_model.py
+++ b/convert_model.py
@@ -2,7 +2,7 @@
import safetensors.torch
from stable_diffusion_pytorch import convert_from_sdmodel
-parser = argparse.ArgumentParser()
+parser = argparse.ArgumentParser(description='Convert an original stable diffusion checkpoint file in safetensor format to 4 separate safetensors for this library.')
parser.add_argument(
"--sd_model", type=str, required=True,
help="Stable Diffusion model to load in safetensors format"
From a02642de422544440719a96c858f84fc4b21e03e Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:25:13 +0000
Subject: [PATCH 10/11] README.md: add example of how to load original sd
checkpoint
---
README.md | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/README.md b/README.md
index 5abcb59..55e567d 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,21 @@ prompts = ["a photograph of an astronaut riding a horse"]
images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu')
```
+Load an original stable diffusion model in safetensors format:
+```py
+import safetensors.torch
+from stable_diffusion_pytorch import convert_from_sdmodel, model_loader
+
+# load the checkpoint file
+state_dict = safetensors.torch.load_file('/PATH/TO/sd-v1-5.safetensors')
+
+# split the state_dict into the 4 state_dicts for our models
+state_dicts = convert_from_sdmodel.split_state_dict(state_dict)
+
+# create the model objects, and apply the weights in state_dicts
+models = model_loader.load_models(state_dicts, 'cpu')
+```
+
Image-to-image generation:
```py
from PIL import Image
From 411fd8e02444448a114425a60d1c844698cbae6d Mon Sep 17 00:00:00 2001
From: a
Date: Mon, 4 Mar 2024 08:31:08 +0000
Subject: [PATCH 11/11] README.md: make sd model example more complete
---
README.md | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 55e567d..add3be1 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ images = pipeline.generate(prompts, models=models, device='cuda', idle_device='c
Load an original stable diffusion model in safetensors format:
```py
import safetensors.torch
-from stable_diffusion_pytorch import convert_from_sdmodel, model_loader
+from stable_diffusion_pytorch import pipeline, convert_from_sdmodel, model_loader
# load the checkpoint file
state_dict = safetensors.torch.load_file('/PATH/TO/sd-v1-5.safetensors')
@@ -108,6 +108,9 @@ state_dicts = convert_from_sdmodel.split_state_dict(state_dict)
# create the model objects, and apply the weights in state_dicts
models = model_loader.load_models(state_dicts, 'cpu')
+
+prompts = ["a photograph of an astronaut riding a horse"]
+images = pipeline.generate(prompts, models=models, device='cuda', idle_device='cpu')
```
Image-to-image generation: