selkerdawy
diff --git a/‎README.md
+19 b/‎README.md
+19
diff --git a/‎cifar.py
+607 b/‎cifar.py
+607
diff --git a/‎job_baseline.sh
+54 b/‎job_baseline.sh
+54
diff --git a/‎job_dynamic.sh
+63 b/‎job_dynamic.sh
+63
diff --git a/‎models/__init__.py b/‎models/__init__.py
diff --git a/‎models/__pycache__/__init__.cpython-36.pyc
124 Bytes b/‎models/__pycache__/__init__.cpython-36.pyc
124 Bytes
diff --git a/‎models/cifar/__init__.py
+67 b/‎models/cifar/__init__.py
+67
diff --git a/‎models/cifar/__pycache__/__init__.cpython-36.pyc
267 Bytes b/‎models/cifar/__pycache__/__init__.cpython-36.pyc
267 Bytes
diff --git a/‎models/cifar/__pycache__/alexnet.cpython-36.pyc
1.49 KB b/‎models/cifar/__pycache__/alexnet.cpython-36.pyc
1.49 KB
diff --git a/‎models/cifar/__pycache__/densenet.cpython-36.pyc
4.67 KB b/‎models/cifar/__pycache__/densenet.cpython-36.pyc
4.67 KB
diff --git a/‎models/cifar/__pycache__/mobilenet.cpython-36.pyc
4.8 KB b/‎models/cifar/__pycache__/mobilenet.cpython-36.pyc
4.8 KB
diff --git a/‎models/cifar/__pycache__/mobilenetv2.cpython-36.pyc
4.8 KB b/‎models/cifar/__pycache__/mobilenetv2.cpython-36.pyc
4.8 KB
diff --git a/‎models/cifar/__pycache__/preresnet.cpython-36.pyc
4.36 KB b/‎models/cifar/__pycache__/preresnet.cpython-36.pyc
4.36 KB
diff --git a/‎models/cifar/__pycache__/resnet.cpython-36.pyc
6.76 KB b/‎models/cifar/__pycache__/resnet.cpython-36.pyc
6.76 KB
diff --git a/‎models/cifar/__pycache__/resnext.cpython-36.pyc
4.57 KB b/‎models/cifar/__pycache__/resnext.cpython-36.pyc
4.57 KB
diff --git a/‎models/cifar/__pycache__/vgg.cpython-36.pyc
5.26 KB b/‎models/cifar/__pycache__/vgg.cpython-36.pyc
5.26 KB
diff --git a/‎models/cifar/__pycache__/wrn.cpython-36.pyc
6.48 KB b/‎models/cifar/__pycache__/wrn.cpython-36.pyc
6.48 KB
diff --git a/‎models/cifar/mobilenet.py
+174 b/‎models/cifar/mobilenet.py
+174
@@ -0,0 +1,19 @@
+# Fire Together Wire Together
+Sample training code for CIFAR fo dynamic pruning with self-supervised mask
+
+## Environment
+```
+virtualenv .envpy36 -p python3.6 #Initialize environment
+source .envpy36/bin/activate
+pip install -r req.txt # Install dependencies
+```
+  
+## Train baseline
+```
+sh job_baseline.sh #You can change model at line 5
+```
+  
+## Train dynamic
+```
+sh job_dynamic.sh #You can change model at line 5 and threshold at line 40
+```
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+datasetdir='./data'
+dataset=cifar10
+model=mobilenetv1 #vgg16_bn #resnet56 #mobilenetv2
+mlr=1e-1
+task=train
+wd=5e-4
+
+
+scratch(){
+initpath='None'
+init='scratch'
+}
+
+train(){
+    task=train
+    epochs=200
+    lr=0.1
+    extra='--tb'
+    schedule='81 122 151'
+    lr_scheduler_b='step'
+    #lr_scheduler_b='cosine' #for MbnetV2
+}
+
+evaluate(){
+    task=evaluate
+}
+
+scratch
+train
+#evaluate
+
+bs=128
+extra='--baseline'
+echo $initpath
+chkpnt='pretrained/'$dataset'/'$model'/'
+
+if [ $task != evaluate ] 
+then
+    python cifar.py -a $model --dataset $dataset -p $datasetdir\
+    --gpu-id 0,1,2,3 \
+    --checkpoint $chkpnt --init $initpath \
+    --epochs $epochs --lr $lr --mlr $mlr --wd $wd\
+    --train-batch $bs --test-batch $bs\
+    --schedule $schedule --lr_scheduler_b $lr_scheduler_b \
+    $extra 
+else
+    modelbest=$chkpnt'/model_best.pth.tar'
+    python cifar.py -a $model --dataset $dataset -p $datasetdir --checkpoint $chkpnt\
+        --evaluate --test-batch 100\
+        --init $initpath --resume $modelbest --tb \
+        $extra
+fi
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+datasetdir='./data'
+dataset=cifar10
+model=mobilenetv1 #vgg16_bn #resnet56
+mlr=1e-1
+task=train
+wd=5e-4
+
+pretrained(){
+    initpath='pretrained/'$dataset'/'$model'/model_best.pth.tar'
+    init='pretrained'
+}
+trainwithpred(){
+    task=train
+    epochs=200
+    lr=1e-2
+    extra='--tb'
+    schedule='81 122 151'
+    lr_scheduler_b='step'
+    #lr_scheduler_b='cosine' #for MbnetV2
+}
+finetunewithpred(){
+    task=finetune
+    epochs=50
+    lr=1e-3
+    extra='--tb'
+    schedule='29 39 49'
+    lr_scheduler_b='step'
+}
+evaluate(){
+    task=evaluate
+}
+
+pretrained
+trainwithpred
+
+bs=128
+mode='decoupled' #Or joint
+gttype='mass' #Or uniform
+mthresh=1.0 #Keep top {mthresh}% of heatmap mass in case of gttype=mass, or top {mthresh}% filters (uniform pruning) in case of gttype=uniform
+
+echo $initpath
+chkpnt='dynamic-ftwt/'$dataset'/'$task'_'$model'_lr'$lr'_mthresh'$mthresh'_'$mode'_'$gttype'_'$lr_scheduler_b
+
+if [ $task != evaluate ] #Train or finetune
+then
+    python cifar.py -a $model --dataset $dataset -p $datasetdir\
+    --gpu-id 0,1,2,3 \
+    --checkpoint $chkpnt --init $initpath \
+    --epochs $epochs --lr $lr --mlr $mlr --wd $wd\
+    --train-batch $bs --test-batch $bs \
+    --schedule $schedule --lr_scheduler_b $lr_scheduler_b \
+    --mthresh $mthresh --mode $mode --gt-type $gttype\
+    $extra 
+else
+    modelbest=$chkpnt'/model_best.pth.tar'
+    python cifar.py -a $model --dataset $dataset -p $datasetdir --checkpoint $chkpnt\
+        --evaluate --test-batch 100\
+        --init $initpath --resume $modelbest --tb \
+        $extra
+fi
+
@@ -0,0 +1,67 @@
+from __future__ import absolute_import
+
+"""The models subpackage contains definitions for the following model for CIFAR10/CIFAR100
+architectures:
+
+-  `AlexNet`_
+-  `VGG`_
+-  `ResNet`_
+-  `SqueezeNet`_
+-  `DenseNet`_
+
+You can construct a model with random weights by calling its constructor:
+
+.. code:: python
+
+    import torchvision.models as models
+    resnet18 = models.resnet18()
+    alexnet = models.alexnet()
+    squeezenet = models.squeezenet1_0()
+    densenet = models.densenet_161()
+
+We provide pre-trained models for the ResNet variants and AlexNet, using the
+PyTorch :mod:`torch.utils.model_zoo`. These can  constructed by passing
+``pretrained=True``:
+
+.. code:: python
+
+    import torchvision.models as models
+    resnet18 = models.resnet18(pretrained=True)
+    alexnet = models.alexnet(pretrained=True)
+
+ImageNet 1-crop error rates (224x224)
+
+======================== =============   =============
+Network                  Top-1 error     Top-5 error
+======================== =============   =============
+ResNet-18                30.24           10.92
+ResNet-34                26.70           8.58
+ResNet-50                23.85           7.13
+ResNet-101               22.63           6.44
+ResNet-152               21.69           5.94
+Inception v3             22.55           6.44
+AlexNet                  43.45           20.91
+VGG-11                   30.98           11.37
+VGG-13                   30.07           10.75
+VGG-16                   28.41           9.62
+VGG-19                   27.62           9.12
+SqueezeNet 1.0           41.90           19.58
+SqueezeNet 1.1           41.81           19.38
+Densenet-121             25.35           7.83
+Densenet-169             24.00           7.00
+Densenet-201             22.80           6.43
+Densenet-161             22.35           6.20
+======================== =============   =============
+
+
+.. _AlexNet: https://arxiv.org/abs/1404.5997
+.. _VGG: https://arxiv.org/abs/1409.1556
+.. _ResNet: https://arxiv.org/abs/1512.03385
+.. _SqueezeNet: https://arxiv.org/abs/1602.07360
+.. _DenseNet: https://arxiv.org/abs/1608.06993
+"""
+
+from .vgg import *
+from .resnet import *
+from .mobilenet import *
+from .mobilenetv2 import *
@@ -0,0 +1,174 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import pdb
+from utils.layers import  ConvWithMask
+
+__all__ = ['mobilenetv1', 'mobilenetv1_75', 'mobilenetv1_50']
+
+pretrained_TF = False
+relu_fn = None
+import torch
+
+class TFSamePad(nn.Module):
+    def __init__(self, kernel_size, stride):
+        super(TFSamePad, self).__init__()
+        self.stride = stride
+        if kernel_size != 3:
+            raise NotImplementedError('only support kernel_size == 3')
+
+    def forward(self, x):
+        if self.stride == 2:
+            return F.pad(x, (0, 1, 0, 1))
+        elif self.stride == 1:
+            return F.pad(x, (1, 1, 1, 1))
+        else:
+            raise NotImplementedError('only support stride == 1 or 2')
+
+def relu(relu6):
+    if relu6:
+        return nn.ReLU6(inplace=True)
+    else:
+        return nn.ReLU(inplace=True)
+
+class MobileNet(nn.Module):
+    def __init__(self, num_classes=1000, dropout=False, from_TF=False, depth_multiplier=1.0):
+        super(MobileNet, self).__init__()
+
+        self.nmasked_layers = 0
+        self.baseline = True #For profile in thop
+        self.d = depth_multiplier
+
+        global pretrained_TF,  relu_fn, cfg
+        pretrained_TF = from_TF
+        relu_fn = relu(from_TF)
+
+        if num_classes == 1000:
+            self.cfg = cfg['imagenet']
+            self.pool_k = 7
+        else:
+            self.cfg = cfg['cifar']
+            self.pool_k = 2
+
+        self.model = self._make_layers(self.cfg, self.d)
+        self.pool = nn.AvgPool2d(self.pool_k)
+        self.dropout = nn.Dropout(0.2) if dropout else nn.Identity()
+        last_layer = int(self.d * 1024)
+        self.fc = nn.Linear(last_layer, num_classes)
+
+    def _make_layers(self, cfg, d):
+
+        conv_bn = self.conv_bn
+        conv_dw = self.conv_dw
+
+        layers = []
+        in_planes = 3
+        for i, x in enumerate(self.cfg):
+            out_planes = x if isinstance(x, int) else x[0]
+            stride = 1 if isinstance(x, int) else x[1]
+            if i == 0: #First layer is normal conv
+                layers.append(conv_bn(in_planes, out_planes, stride, d))
+            else:
+                layers.append(conv_dw(in_planes, out_planes, stride, d))
+
+            in_planes = out_planes
+
+        return nn.Sequential(*layers)
+
+    @staticmethod
+    def conv_bn(inp, oup, stride, d):
+
+        oup = int(d * oup)
+        layers=[]
+        pad = 1
+
+        # PyTorch BN defaults
+        eps=1e-5
+        momentum=0.1
+        if pretrained_TF:
+            layers += [TFSamePad(3, stride)]
+            pad = 0
+            # TF BN defaults
+            eps = 1e-3
+            momentum = 1e-3
+
+        layers += [
+                nn.Conv2d(inp, oup, 3, stride, pad, bias=False),
+                nn.BatchNorm2d(oup, eps=eps, momentum=momentum),
+                relu_fn]
+        return nn.Sequential(*layers)
+
+    @staticmethod
+    def conv_dw(inp, oup, stride, d):
+        inp = int(d * inp)
+        oup = int(d * oup)
+        layers=[]
+        pad = 1
+
+        # PyTorch BN defaults
+        eps=1e-5
+        momentum=0.1
+        if pretrained_TF:
+            layers += [TFSamePad(3, stride)]
+            pad = 0
+            # TF BN defaults
+            eps = 1e-3
+            momentum = 1e-3
+
+        layers += [
+                nn.Conv2d(inp, inp, 3, stride, pad, groups=inp, bias=False),
+                nn.BatchNorm2d(inp, eps=eps, momentum=momentum),
+                relu_fn,
+
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup, eps=eps, momentum=momentum),
+                relu_fn]
+
+        return nn.Sequential(*layers)
+
+    def forward_baseline(self, x):
+        x = self.model(x)
+        x = self.dropout(self.pool(x))
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        return x
+
+    def forward_mask(self, x):
+        all_mask_logits, all_gt_mask = [], []
+        FLOPs = 0
+        bs = x.shape[0]
+        prev = torch.ones(bs) * x.shape[1]
+
+        for i, m in enumerate(self.model.children()):
+            if isinstance(m, ConvWithMask):
+                x, all_mask_logits, prev, all_gt_mask, cur_flops = m(x, all_mask_logits, all_gt_mask, prev)
+                FLOPs += cur_flops
+            else:
+                x = m(x)
+
+        x = self.dropout(self.pool(x))
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        FLOPs = FLOPs.to(x.device)
+
+        return x, all_mask_logits, all_gt_mask, FLOPs
+
+    def forward(self, x):
+        if self.baseline:
+            return self.forward_baseline(x)
+        else:
+            return self.forward_mask(x)
+
+cfg = {
+        'cifar': [(32,1), 64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024],
+        'imagenet': [(32,2), 64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024],
+        }
+
+
+def mobilenetv1(num_classes, dropout=False, from_TF=False):
+    return MobileNet(num_classes, dropout, from_TF, depth_multiplier=1.)
+
+def mobilenetv1_75(num_classes, dropout=False, from_TF=False):
+    return MobileNet(num_classes, dropout, from_TF, depth_multiplier=0.75)
+
+def mobilenetv1_50(num_classes, dropout=False, from_TF=False):
+    return MobileNet(num_classes, dropout, from_TF, depth_multiplier=0.50)