Commit CPU-Quantization

nehaprakriya · Jul 29, 2022 · acfca2d · acfca2d
1 parent b0374a2
commit acfca2d
Show file tree

Hide file tree

Showing 11 changed files with 204 additions and 33 deletions.
diff --git a/__pycache__/GradualWarmupScheduler.cpython-37.pyc b/__pycache__/GradualWarmupScheduler.cpython-37.pyc
diff --git a/__pycache__/lazy_greedy.cpython-37.pyc b/__pycache__/lazy_greedy.cpython-37.pyc
diff --git a/__pycache__/proxy_quantization.cpython-37.pyc b/__pycache__/proxy_quantization.cpython-37.pyc
diff --git a/__pycache__/quantization.cpython-37.pyc b/__pycache__/quantization.cpython-37.pyc
diff --git a/__pycache__/resnet.cpython-37.pyc b/__pycache__/resnet.cpython-37.pyc
diff --git a/__pycache__/resnet_quant.cpython-37.pyc b/__pycache__/resnet_quant.cpython-37.pyc
diff --git a/__pycache__/target_quantization.cpython-37.pyc b/__pycache__/target_quantization.cpython-37.pyc
diff --git a/__pycache__/util.cpython-37.pyc b/__pycache__/util.cpython-37.pyc
diff --git a/resnet.py b/resnet.py
@@ -54,7 +54,7 @@ def forward(self, x):
 class BasicBlock(nn.Module):
     expansion = 1
 
-    def __init__(self, in_planes, planes, stride=1, option='A'):
+    def __init__(self, in_planes, planes, stride=1, option='B'):
         super(BasicBlock, self).__init__()
         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(planes)
@@ -71,8 +71,8 @@ def __init__(self, in_planes, planes, stride=1, option='A'):
                                             F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
             elif option == 'B':
                 self.shortcut = nn.Sequential(
-                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
-                     nn.BatchNorm2d(self.expansion * planes)
+                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False)
+                    #  nn.BatchNorm2d(self.expansion * planes)
                 )
 
     def forward(self, x):

diff --git a/resnet_quant.py b/resnet_quant.py
@@ -0,0 +1,129 @@
+'''
+Properly implemented ResNet-s for CIFAR10 as described in paper [1].
+
+The implementation and structure of this file is hugely influenced by [2]
+which is implemented for ImageNet and doesn't have option A for identity.
+Moreover, most of the implementations on the web is copy-paste from
+torchvision's resnet and has wrong number of params.
+
+Proper ResNet-s for CIFAR10 (for fair comparision and etc.) has following
+number of layers and parameters:
+
+name      | layers | params
+ResNet20  |    20  | 0.27M
+ResNet32  |    32  | 0.46M
+ResNet44  |    44  | 0.66M
+ResNet56  |    56  | 0.85M
+ResNet110 |   110  |  1.7M
+ResNet1202|  1202  | 19.4m
+
+which this implementation indeed has.
+
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+[2] https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+
+If you use this implementation in you work, please don't forget to mention the
+author, Yerlan Idelbayev.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+
+from torch.autograd import Variable
+
+__all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']
+
+def _weights_init(m):
+    classname = m.__class__.__name__
+    #print(classname)
+    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
+        init.kaiming_normal_(m.weight)
+
+class LambdaLayer(nn.Module):
+    def __init__(self, lambd):
+        super(LambdaLayer, self).__init__()
+        self.lambd = lambd
+
+    def forward(self, x):
+        return self.lambd(x)
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1, option='B'):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        self.skip_add = nn.quantized.FloatFunctional()
+        if stride != 1 or in_planes != planes:
+            if option == 'A':
+                """
+                For CIFAR10 ResNet paper uses option A.
+                """
+                self.shortcut = LambdaLayer(lambda x:
+                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
+            elif option == 'B':
+                self.shortcut = nn.Sequential(
+                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False)
+                    #  nn.BatchNorm2d(self.expansion * planes)
+                )
+
+    def forward(self, x):
+        # out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.conv1(x))
+        # out = self.bn2(self.conv2(out))
+        out = self.conv2(out)
+        # out += self.shortcut(x)
+        out = self.skip_add.add(out, self.shortcut(x))
+        out = F.relu(out)
+        return out
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(ResNet, self).__init__()
+        self.in_planes = 16
+
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
+        self.linear = nn.Linear(64, num_classes)
+
+        self.apply(_weights_init)
+
+        self.quant = torch.quantization.QuantStub()
+        self.dequant = torch.quantization.DeQuantStub()
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.quant(x)
+        # out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.conv1(x))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.avg_pool2d(out, out.size()[3])
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        out = self.dequant(out)
+        return out
+
+def resnet20():
+    return ResNet(BasicBlock, [3, 3, 3])
+
diff --git a/train_resnet.py b/train_resnet.py
@@ -11,31 +11,23 @@
 import torch.utils.data
 import torchvision.transforms as transforms
 import torchvision.datasets as datasets
-import resnet_icml as resnet
+# import resnet_icml as resnet
 
 from torch.utils.data import Dataset, DataLoader
 import util
 from warnings import simplefilter
 from GradualWarmupScheduler import *
 
+from resnet import resnet20 as target_resnet20
+from resnet_quant import resnet20 as quant_resnet20
 
 # ignore all future warnings
 simplefilter(action='ignore', category=FutureWarning)
 np.seterr(all='ignore')
 
-os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
-
-model_names = sorted(name for name in resnet.__dict__
-    if name.islower() and not name.startswith("__")
-                     and name.startswith("resnet")
-                     and callable(resnet.__dict__[name]))
-
-print(model_names)
-
 parser = argparse.ArgumentParser(description='Propert ResNets for CIFAR10 in pytorch')
 parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet20', #'resnet56', #
-                    choices=model_names,
-                    help='model architecture: ' + ' | '.join(model_names) +
+                    help='model architecture: ' +
                          ' (default: resnet32)')
 parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                     help='number of data loading workers (default: 4)')
@@ -88,22 +80,24 @@
 TRAIN_NUM = 50000
 CLASS_NUM = 10
 
-
+print("hello")
 def main(subset_size=.1, greedy=0):
-
+    print("hello")
     global args, best_prec1
     args = parser.parse_args()
-    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+    # os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
 
     print(f'--------- subset_size: {subset_size}, method: {args.ig}, moment: {args.momentum}, '
           f'lr_schedule: {args.lr_schedule}, greedy: {greedy}, stoch: {args.st_grd}, rs: {args.random_subset_size} ---------------')
-
+    print(args.lr_schedule)
     # Check the save_dir exists or not
     if not os.path.exists(args.save_dir):
         os.makedirs(args.save_dir)
 
-    model = torch.nn.DataParallel(resnet.__dict__[args.arch]())
-    model.cuda()
+
+    model = target_resnet20()
+    device='cuda'
+    model.to(device)
 
     # optionally resume from a checkpoint
     if args.resume:
@@ -216,11 +210,11 @@ def __len__(self):
             order = order[:B]
             print(f'Random init subset size: {args.random_subset_size}% = {B}')
 
-        model = torch.nn.DataParallel(resnet.__dict__[args.arch]())
+        model=target_resnet20()
         model.cuda()
-
+        q_model = quant_resnet20()
+        q_model.to('cpu')
         best_prec1, best_loss = 0, 1e10
-
         if args.ig == 'adam':
             print('using adam')
             optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=args.weight_decay)
@@ -238,9 +232,9 @@ def __len__(self):
         elif args.lr_schedule == 'step':
             lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=b)
         elif args.lr_schedule == 'mile':
-            milestones = np.array([100, 150])
+            milestones = np.array([60, 120, 160])
             lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
-                optimizer, milestones=milestones, last_epoch=args.start_epoch - 1, gamma=b)
+                optimizer, milestones=milestones, last_epoch=args.start_epoch - 1, gamma=0.2)
         elif args.lr_schedule == 'cosine':
             # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
             lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
@@ -297,24 +291,45 @@ def __len__(self):
                         preds, labels = np.reshape(data.data, (len(data.targets), -1)), data.targets
                     else:
                         print(f'Selecting {B} elements greedily from predictions')
-                        preds, labels = predictions(indexed_loader, model)
+                        torch.save(model.state_dict(), 'cifar10_target.pt')
+                        print('Size (MB):', os.path.getsize("cifar10_target.pt")/1e6)
+                        loaded_dict_enc = torch.load('cifar10_target.pt', map_location='cpu')
+                        q_model = quant_resnet20()
+                        q_model.to('cpu')
+                        q_model.load_state_dict(loaded_dict_enc)
+                        print("loaded state dict")
+                        q_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
+                        torch.quantization.prepare(q_model, inplace=True)
+                        q_model.eval()
+                        torch.quantization.convert(q_model, inplace=True)
+                        torch.save(q_model.state_dict(), 'cifar10_target.pt')
+                        print('Size (MB):', os.path.getsize("cifar10_target.pt")/1e6)
+                        preds, labels = quantization_predictions(indexed_loader, q_model)
                         preds -= np.eye(CLASS_NUM)[labels]
-
+                    if epoch<=60:
+                        B = 50000
+                    # elif 30<epoch and epoch<=75:
+                    #     B = 25000
+                    # elif 75<epoch and epoch<=100:
+                    #     B = 10000
+                    else:
+                        B = 1000
+                    print(B)
                     fl_labels = np.zeros(np.shape(labels), dtype=int) if args.cluster_all else labels
                     subset, subset_weight, _, _, ordering_time, similarity_time = util.get_orders_and_weights(
                         B, preds, 'euclidean', smtk=args.smtk, no=0, y=fl_labels, stoch_greedy=args.st_grd,
                         equal_num=True)
 
                     weights = np.zeros(len(indexed_loader.dataset))
-                    # weights[subset] = np.ones(len(subset))
+                    weights[subset] = np.ones(len(subset))
                     subset_weight = subset_weight / np.sum(subset_weight) * len(subset_weight)
                     if args.save_subset:
                         selected_ndx[run, epoch], selected_wgt[run, epoch] = subset, subset_weight
 
                     weights[subset] = subset_weight
                     weight = torch.from_numpy(weights).float().cuda()
-                    # weight = torch.tensor(weights).cuda()
-                    # np.random.shuffle(subset)
+                    weight = torch.tensor(weights).cuda()
+                    np.random.shuffle(subset)
                     print(f'FL time: {ordering_time:.3f}, Sim time: {similarity_time:.3f}')
                     grd_time[run, epoch], sim_time[run, epoch] = ordering_time, similarity_time
 
@@ -379,7 +394,7 @@ def __len__(self):
             grd += f'_warm' if args.warm_start > 0 else ''
             grd += f'_feature' if args.cluster_features else ''
             grd += f'_ca' if args.cluster_all else ''
-            folder = f'/tmp/cifar10'
+            folder = f'/home/nehaprakriya/quant/resnet20/'
 
             if args.save_subset:
                 print(
@@ -408,6 +423,8 @@ def __len__(self):
           np.min(not_selected, 1), np.mean(np.min(not_selected, 1)))
 
 
+
+
 def train(train_loader, model, criterion, optimizer, epoch, weight=None):
     """
         Run one train epoch
@@ -438,7 +455,7 @@ def train(train_loader, model, criterion, optimizer, epoch, weight=None):
         # compute output
         output = model(input_var)
         loss = criterion(output, target_var)
-        loss = (loss * weight[idx.long()]).mean()  # (Note)
+        loss = (loss).mean()  # (Note)
 
         # compute gradient and do SGD step
         optimizer.zero_grad()
@@ -542,6 +559,20 @@ def update(self, val, n=1):
         self.count += n
         self.avg = self.sum / self.count
 
+# add a function for quant predictions
+
+def quant_predictions(loader, model):
+    model.eval()
+    preds=numpy.zeros(TRAIN_NUM, CLASS_NUM)
+    labels=numpy.zeros(TRAIN_NUM, dtype=torch.int)
+    with torch.no_grad():
+        for i, (input, target, idx) in enumerate(loader):
+            output = model(input)
+            preds[idx, :] = nn.Softmax(dim=1)(output)
+    return preds
+
+
+
 
 def predictions(loader, model):
     """
@@ -576,6 +607,17 @@ def predictions(loader, model):
 
     return preds.cpu().data.numpy(), labels.cpu().data.numpy()
 
+def quantization_predictions(loader, model):
+    model.to('cpu')
+    model.eval()
+    preds = np.zeros((TRAIN_NUM, CLASS_NUM))
+    labels = np.zeros(TRAIN_NUM)
+    labels=labels.astype('int32')
+    for i, (input, target, idx) in enumerate(loader):
+        preds[idx, :] = nn.Softmax(dim=1)(model(input))
+        labels[idx] = target.int()
+    return preds, labels
+
 
 def accuracy(output, target, topk=(1,)):
     """Computes the precision@k for the specified values of k"""