sunmeng7
diff --git a/‎README.md
+34-6 b/‎README.md
+34-6
diff --git a/‎demo.py
+5-5 b/‎demo.py
+5-5
diff --git a/‎environment.yml
+1 b/‎environment.yml
+1
diff --git a/‎metrics/__init__.py b/‎metrics/__init__.py
diff --git a/‎metrics/attribute_consistency.py
+6-6 b/‎metrics/attribute_consistency.py
+6-6
diff --git a/‎metrics/eval_encoder.py
+7-7 b/‎metrics/eval_encoder.py
+7-7
diff --git a/‎metrics/fid.py
+4-4 b/‎metrics/fid.py
+4-4
diff --git a/‎metrics/ppl.py
+3-3 b/‎metrics/ppl.py
+3-3
diff --git a/‎model/dynamic_channel.py
-78 b/‎model/dynamic_channel.py
-78
diff --git a/‎model/__init__.py ‎models/__init__.py
+3-3 b/‎model/__init__.py ‎models/__init__.py
+3-3
diff --git a/‎model/anycost_gan.py ‎models/anycost_gan.py
+11-7 b/‎model/anycost_gan.py ‎models/anycost_gan.py
+11-7
@@ -94,17 +94,17 @@ You can find a video recording of the demo [here](https://youtu.be/_yEziPl9AkM?t
 To get the pre-trained generator, encoder, and editing directions, run:
 
 ```python
-import model
+import models
 
 pretrained_type = 'generator'  # choosing from ['generator', 'encoder', 'boundary']
 config_name = 'anycost-ffhq-config-f'  # replace the config name for other models
-model.get_pretrained(pretrained_type, config=config_name)
+models.get_pretrained(pretrained_type, config=config_name)
 ```
 
 We also provide the face attribute classifier (which is general for different generators) for computing the editing directions. You can get it by running:
 
 ```python
-model.get_pretrained('attribute-predictor')
+models.get_pretrained('attribute-predictor')
 ```
 
 The attribute classifier takes in the face images in FFHQ format.
@@ -114,9 +114,9 @@ The attribute classifier takes in the face images in FFHQ format.
 After loading the Anycost generator, we can run it at a wide range of computational costs. For example:
 
 ```python
-from model.dynamic_channel import set_uniform_channel_ratio, reset_generator
+from models.dynamic_channel import set_uniform_channel_ratio, reset_generator
 
-g = model.get_pretrained('generator', config='anycost-ffhq-config-f')  # anycost uniform
+g = models.get_pretrained('generator', config='anycost-ffhq-config-f')  # anycost uniform
 set_uniform_channel_ratio(g, 0.5)  # set channel
 g.target_res = 512  # set resolution
 out, _ = g(...)  # generate image
@@ -223,7 +223,35 @@ python metrics/eval_encoder.py \
 
 ### Training
 
-The training code will be updated shortly.
+We provide the scripts to train Anycost GAN on FFHQ dataset.
+
+- Training the original StyleGAN2 on FFHQ
+
+```
+horovodrun -np 8 bash scripts/train_stylegan2_ffhq.sh
+```
+
+The training of original StyleGAN2 is time-consuming. We recommend downloading the converted checkpoints from [here](https://www.dropbox.com/sh/l8g9amoduz99kjh/AAAY9LYZk2CnsO43ywDrLZpEa?dl=0) and place it under `checkpoint/`.
+
+- Training Anycost GAN: mult-resolution 
+
+```
+horovodrun -np 8 bash scripts/train_stylegan2_multires_ffhq.sh
+```
+
+Note that after each epoch, we evaluate the FIDs of two resolutions (1024&512) to better monitor the training progress. We also apply distillation to accelearte the convergence, which is not used for the ablation in the paper.
+
+- Training Anycost GAN: adaptive-channel
+
+```
+horovodrun -np 8 bash scripts/train_stylegan2_multires_adach_ffhq.sh
+```
+
+Here we set a longer training epoch for a more stable reproduction, which might not be necessary (depending on the randomness).
+
+
+
+**Note**: We trained our models on Titan RTX GPUs with 24GB memory. For GPUs with smaller memory, you may need to reduce the resolution/model size/batch size/etc. and adjust other hyper-parameters accordingly.
 
 
 
 
@@ -2,8 +2,8 @@
 import numpy as np
 import os
 from PIL import Image
-from model.dynamic_channel import set_uniform_channel_ratio, reset_generator
-import model
+from models.dynamic_channel import set_uniform_channel_ratio, reset_generator
+import models
 import time
 
 import sys
@@ -109,7 +109,7 @@ def __init__(self):
             self.set_text_format(attr_label, 'right', 15)
             attr_label.move(520 - 110, 470 + i_slider * 40 + 2)
 
-        # build model sliders
+        # build models sliders
         base_h = 560
         channel_label = QLabel(self)
         channel_label.setText('channel:')
@@ -187,7 +187,7 @@ def load_assets(self):
         self.anycost_resolution = 1024
 
         # build the generator
-        self.generator = model.get_pretrained('generator', config).to(device)
+        self.generator = models.get_pretrained('generator', config).to(device)
         self.generator.eval()
         self.mean_latent = self.generator.mean_style(10000)
 
@@ -213,7 +213,7 @@ def load_assets(self):
             'mustache': '22_Mustache',
         }
 
-        boundaries = model.get_pretrained('boundary', config)
+        boundaries = models.get_pretrained('boundary', config)
         self.direction_dict = dict()
         for k, v in direction_map.items():
             self.direction_dict[k] = boundaries[v].view(1, 1, -1)
 
@@ -20,3 +20,4 @@ dependencies:
     - torchprofile==0.0.2
     - pyqt5==5.15.2
     - horovod==0.21.3
+    - tensorboard==2.4.1
@@ -5,13 +5,13 @@
 import math
 import torch
 from tqdm import tqdm
-import model
+import models
 import horovod.torch as hvd
-from utils import adaptive_resize
+from utils.torch_utils import adaptive_resize
 
 
 def compute_attribute_consistency(g, sub_g, n_sample, batch_size):
-    attr_pred = model.get_pretrained('attribute-predictor').to(device)
+    attr_pred = models.get_pretrained('attribute-predictor').to(device)
     attr_pred.eval()
 
     n_batch = math.ceil(n_sample * 1. / batch_size / hvd.size())
@@ -56,11 +56,11 @@ def compute_attribute_consistency(g, sub_g, n_sample, batch_size):
     hvd.init()
     torch.cuda.set_device(hvd.local_rank())
 
-    generator = model.get_pretrained('generator', args.config).to(device).eval()
+    generator = models.get_pretrained('generator', args.config).to(device).eval()
 
-    sub_generator = model.get_pretrained('generator', args.config).to(device).eval()
+    sub_generator = models.get_pretrained('generator', args.config).to(device).eval()
     if args.channel_ratio:
-        from model.dynamic_channel import set_uniform_channel_ratio
+        from models.dynamic_channel import set_uniform_channel_ratio
         set_uniform_channel_ratio(sub_generator, args.channel_ratio)
 
     if args.target_res is not None:
 
@@ -6,14 +6,14 @@
 import torch
 import torch.nn as nn
 from tqdm import tqdm
-import model
-from utils import adaptive_resize
+import models
+from utils.torch_utils import adaptive_resize
 from thirdparty.celeba_hq_split import get_celeba_hq_split
 from torchvision import transforms
 import lpips
-from utils import AverageMeter
-from model.dynamic_channel import set_uniform_channel_ratio, remove_sub_channel_config
-from utils import NativeDataset
+from utils.torch_utils import AverageMeter
+from models.dynamic_channel import set_uniform_channel_ratio, remove_sub_channel_config
+from utils.datasets import NativeDataset
 
 
 def validate():
@@ -89,10 +89,10 @@ def validate():
     args = parser.parse_args()
 
     # build models
-    generator = model.get_pretrained('generator', args.config).to(device)
+    generator = models.get_pretrained('generator', args.config).to(device)
     generator.eval()
 
-    encoder = model.get_pretrained('encoder', args.config).to(device)
+    encoder = models.get_pretrained('encoder', args.config).to(device)
     encoder.eval()
 
     # build test dataset
 
@@ -7,7 +7,7 @@
 import numpy as np
 from scipy import linalg
 from tqdm import tqdm
-import model
+import models
 
 
 def calc_fid(sample_mean, sample_cov, real_mean, real_cov, eps=1e-6):
@@ -79,12 +79,12 @@ def compute_fid():
     hvd.init()
     torch.cuda.set_device(hvd.local_rank())
 
-    generator = model.get_pretrained('generator', args.config).to(device)
+    generator = models.get_pretrained('generator', args.config).to(device)
     generator.eval()
 
     # set sub-generator
     if args.channel_ratio:
-        from model.dynamic_channel import set_uniform_channel_ratio, CHANNEL_CONFIGS
+        from models.dynamic_channel import set_uniform_channel_ratio, CHANNEL_CONFIGS
 
         assert args.channel_ratio in CHANNEL_CONFIGS
         set_uniform_channel_ratio(generator, args.channel_ratio)
@@ -103,7 +103,7 @@ def compute_fid():
         except:
             print(' * Profiling failed. Passed.')
 
-    inception = model.get_pretrained('inception').to(device)
+    inception = models.get_pretrained('inception').to(device)
     inception.eval()
 
     inception_features = extract_feature_from_samples()
 
@@ -8,7 +8,7 @@
 import numpy as np
 from tqdm import tqdm
 import lpips
-import model
+import models
 import horovod.torch as hvd
 
 
@@ -108,11 +108,11 @@ def compute_ppl(g, n_sample, batch_size, space='w', sampling='end', eps=1e-4, cr
     hvd.init()
     torch.cuda.set_device(hvd.local_rank())
 
-    generator = model.get_pretrained('generator', args.config).to(device)
+    generator = models.get_pretrained('generator', args.config).to(device)
     generator.eval()
 
     if args.channel_ratio:
-        from model.dynamic_channel import set_uniform_channel_ratio
+        from models.dynamic_channel import set_uniform_channel_ratio
         set_uniform_channel_ratio(generator, args.channel_ratio)
 
     if args.target_res is not None:
 
@@ -1,7 +1,7 @@
 from .anycost_gan import Generator
 import torch
 from torchvision import models
-from utils import safe_load_state_dict_from_url
+from utils.torch_utils import safe_load_state_dict_from_url
 
 URL_TEMPLATE = 'https://hanlab.mit.edu/projects/anycost-gan/files/{}_{}.pt'
 
@@ -44,7 +44,7 @@ def get_pretrained(model, config=None):
             style_dim = 512
         else:
             raise NotImplementedError
-        from model.encoder import ResNet50Encoder
+        from models.encoder import ResNet50Encoder
         model = ResNet50Encoder(n_style=n_style, style_dim=style_dim)
         model.load_state_dict(load_state_dict_from_url(url, 'state_dict'))
         return model
@@ -53,7 +53,7 @@ def get_pretrained(model, config=None):
         predictor.fc = torch.nn.Linear(predictor.fc.in_features, 40 * 2)
         predictor.load_state_dict(load_state_dict_from_url(url, 'state_dict'))
         return predictor
-    elif model == 'inception':  # inception model
+    elif model == 'inception':  # inception models
         from thirdparty.inception import InceptionV3
         return InceptionV3([3], normalize_input=False, resize_input=True)
     elif model == 'boundary':
 
@@ -4,7 +4,7 @@
 import torch
 from torch import nn
 
-from model.ops import *
+from models.ops import *
 
 G_CHANNEL_CONFIG = {
     4: 4096,
@@ -214,8 +214,8 @@ def __init__(self, resolution, channel_multiplier=2, channel_max=512, blur_kerne
             EqualLinear(channels[4], 1),
         )
 
-    def forward(self, input):
-        out = self.convs(input)
+    def forward(self, x):
+        out = self.convs(x)
 
         batch, channel, height, width = out.shape
         group = min(batch, self.stddev_group)
@@ -237,7 +237,7 @@ def forward(self, input):
 
 class DiscriminatorMultiRes(nn.Module):
     def __init__(self, resolution, channel_multiplier=2, channel_max=512, blur_kernel=(1, 3, 3, 1), act_func='lrelu',
-                 n_res=1):
+                 n_res=1, modulate=False):
         super().__init__()
 
         channels = {k: min(channel_max, int(v * channel_multiplier)) for k, v in D_CHANNEL_CONFIG.items()}
@@ -255,7 +255,11 @@ def __init__(self, resolution, channel_multiplier=2, channel_max=512, blur_kerne
         self.blocks = nn.ModuleList()
         for i in range(log_res, 2, -1):
             out_channel = channels[2 ** (i - 1)]  # the out channel corresponds to a lower resolution
-            self.blocks.append(ResBlock(in_channel, out_channel, blur_kernel, act_func=act_func))
+            self.blocks.append(
+                ResBlock(in_channel, out_channel, blur_kernel, act_func=act_func,
+                         modulate=modulate and i in list(range(log_res, 2, -1))[-2:],  # add g_arch modulation
+                         g_arch_len=4 * (log_res * 2 - 2))
+            )
             in_channel = out_channel
 
         self.stddev_group = 4
@@ -267,12 +271,12 @@ def __init__(self, resolution, channel_multiplier=2, channel_max=512, blur_kerne
             EqualLinear(channels[4], 1),
         )
 
-    def forward(self, x):
+    def forward(self, x, g_arch=None):
         res = x.shape[-1]
         idx = self.res2idx[res]
         out = self.convs[idx](x)
         for i in range(idx, len(self.blocks)):
-            out = self.blocks[i](out)
+            out = self.blocks[i](out, g_arch)
 
         out = self.minibatch_discrimination(out, self.stddev_group, self.stddev_feat)
         out = self.final_conv(out).view(out.shape[0], -1)