Skip to content

Commit

Permalink
Commit CPU-Quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
nehaprakriya committed Jul 29, 2022
1 parent b0374a2 commit acfca2d
Show file tree
Hide file tree
Showing 11 changed files with 204 additions and 33 deletions.
Binary file added __pycache__/GradualWarmupScheduler.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/lazy_greedy.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/proxy_quantization.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/quantization.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/resnet.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/resnet_quant.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/target_quantization.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/util.cpython-37.pyc
Binary file not shown.
6 changes: 3 additions & 3 deletions resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def forward(self, x):
class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1, option='A'):
def __init__(self, in_planes, planes, stride=1, option='B'):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
Expand All @@ -71,8 +71,8 @@ def __init__(self, in_planes, planes, stride=1, option='A'):
F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
elif option == 'B':
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False)
# nn.BatchNorm2d(self.expansion * planes)
)

def forward(self, x):
Expand Down
129 changes: 129 additions & 0 deletions resnet_quant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
'''
Properly implemented ResNet-s for CIFAR10 as described in paper [1].
The implementation and structure of this file is hugely influenced by [2]
which is implemented for ImageNet and doesn't have option A for identity.
Moreover, most of the implementations on the web is copy-paste from
torchvision's resnet and has wrong number of params.
Proper ResNet-s for CIFAR10 (for fair comparision and etc.) has following
number of layers and parameters:
name | layers | params
ResNet20 | 20 | 0.27M
ResNet32 | 32 | 0.46M
ResNet44 | 44 | 0.66M
ResNet56 | 56 | 0.85M
ResNet110 | 110 | 1.7M
ResNet1202| 1202 | 19.4m
which this implementation indeed has.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
[2] https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
If you use this implementation in you work, please don't forget to mention the
author, Yerlan Idelbayev.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.autograd import Variable

__all__ = ['ResNet', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202']

def _weights_init(m):
classname = m.__class__.__name__
#print(classname)
if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
def __init__(self, lambd):
super(LambdaLayer, self).__init__()
self.lambd = lambd

def forward(self, x):
return self.lambd(x)

class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1, option='B'):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
self.skip_add = nn.quantized.FloatFunctional()
if stride != 1 or in_planes != planes:
if option == 'A':
"""
For CIFAR10 ResNet paper uses option A.
"""
self.shortcut = LambdaLayer(lambda x:
F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
elif option == 'B':
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False)
# nn.BatchNorm2d(self.expansion * planes)
)

def forward(self, x):
# out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.conv1(x))
# out = self.bn2(self.conv2(out))
out = self.conv2(out)
# out += self.shortcut(x)
out = self.skip_add.add(out, self.shortcut(x))
out = F.relu(out)
return out

class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 16

self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
self.linear = nn.Linear(64, num_classes)

self.apply(_weights_init)

self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion

return nn.Sequential(*layers)

def forward(self, x):
x = self.quant(x)
# out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.conv1(x))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, out.size()[3])
out = out.view(out.size(0), -1)
out = self.linear(out)
out = self.dequant(out)
return out

def resnet20():
return ResNet(BasicBlock, [3, 3, 3])

102 changes: 72 additions & 30 deletions train_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,23 @@
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import resnet_icml as resnet
# import resnet_icml as resnet

from torch.utils.data import Dataset, DataLoader
import util
from warnings import simplefilter
from GradualWarmupScheduler import *

from resnet import resnet20 as target_resnet20
from resnet_quant import resnet20 as quant_resnet20

# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
np.seterr(all='ignore')

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152

model_names = sorted(name for name in resnet.__dict__
if name.islower() and not name.startswith("__")
and name.startswith("resnet")
and callable(resnet.__dict__[name]))

print(model_names)

parser = argparse.ArgumentParser(description='Propert ResNets for CIFAR10 in pytorch')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet20', #'resnet56', #
choices=model_names,
help='model architecture: ' + ' | '.join(model_names) +
help='model architecture: ' +
' (default: resnet32)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
Expand Down Expand Up @@ -88,22 +80,24 @@
TRAIN_NUM = 50000
CLASS_NUM = 10


print("hello")
def main(subset_size=.1, greedy=0):

print("hello")
global args, best_prec1
args = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
# os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

print(f'--------- subset_size: {subset_size}, method: {args.ig}, moment: {args.momentum}, '
f'lr_schedule: {args.lr_schedule}, greedy: {greedy}, stoch: {args.st_grd}, rs: {args.random_subset_size} ---------------')

print(args.lr_schedule)
# Check the save_dir exists or not
if not os.path.exists(args.save_dir):
os.makedirs(args.save_dir)

model = torch.nn.DataParallel(resnet.__dict__[args.arch]())
model.cuda()

model = target_resnet20()
device='cuda'
model.to(device)

# optionally resume from a checkpoint
if args.resume:
Expand Down Expand Up @@ -216,11 +210,11 @@ def __len__(self):
order = order[:B]
print(f'Random init subset size: {args.random_subset_size}% = {B}')

model = torch.nn.DataParallel(resnet.__dict__[args.arch]())
model=target_resnet20()
model.cuda()

q_model = quant_resnet20()
q_model.to('cpu')
best_prec1, best_loss = 0, 1e10

if args.ig == 'adam':
print('using adam')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=args.weight_decay)
Expand All @@ -238,9 +232,9 @@ def __len__(self):
elif args.lr_schedule == 'step':
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=b)
elif args.lr_schedule == 'mile':
milestones = np.array([100, 150])
milestones = np.array([60, 120, 160])
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer, milestones=milestones, last_epoch=args.start_epoch - 1, gamma=b)
optimizer, milestones=milestones, last_epoch=args.start_epoch - 1, gamma=0.2)
elif args.lr_schedule == 'cosine':
# lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
Expand Down Expand Up @@ -297,24 +291,45 @@ def __len__(self):
preds, labels = np.reshape(data.data, (len(data.targets), -1)), data.targets
else:
print(f'Selecting {B} elements greedily from predictions')
preds, labels = predictions(indexed_loader, model)
torch.save(model.state_dict(), 'cifar10_target.pt')
print('Size (MB):', os.path.getsize("cifar10_target.pt")/1e6)
loaded_dict_enc = torch.load('cifar10_target.pt', map_location='cpu')
q_model = quant_resnet20()
q_model.to('cpu')
q_model.load_state_dict(loaded_dict_enc)
print("loaded state dict")
q_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
torch.quantization.prepare(q_model, inplace=True)
q_model.eval()
torch.quantization.convert(q_model, inplace=True)
torch.save(q_model.state_dict(), 'cifar10_target.pt')
print('Size (MB):', os.path.getsize("cifar10_target.pt")/1e6)
preds, labels = quantization_predictions(indexed_loader, q_model)
preds -= np.eye(CLASS_NUM)[labels]

if epoch<=60:
B = 50000
# elif 30<epoch and epoch<=75:
# B = 25000
# elif 75<epoch and epoch<=100:
# B = 10000
else:
B = 1000
print(B)
fl_labels = np.zeros(np.shape(labels), dtype=int) if args.cluster_all else labels
subset, subset_weight, _, _, ordering_time, similarity_time = util.get_orders_and_weights(
B, preds, 'euclidean', smtk=args.smtk, no=0, y=fl_labels, stoch_greedy=args.st_grd,
equal_num=True)

weights = np.zeros(len(indexed_loader.dataset))
# weights[subset] = np.ones(len(subset))
weights[subset] = np.ones(len(subset))
subset_weight = subset_weight / np.sum(subset_weight) * len(subset_weight)
if args.save_subset:
selected_ndx[run, epoch], selected_wgt[run, epoch] = subset, subset_weight

weights[subset] = subset_weight
weight = torch.from_numpy(weights).float().cuda()
# weight = torch.tensor(weights).cuda()
# np.random.shuffle(subset)
weight = torch.tensor(weights).cuda()
np.random.shuffle(subset)
print(f'FL time: {ordering_time:.3f}, Sim time: {similarity_time:.3f}')
grd_time[run, epoch], sim_time[run, epoch] = ordering_time, similarity_time

Expand Down Expand Up @@ -379,7 +394,7 @@ def __len__(self):
grd += f'_warm' if args.warm_start > 0 else ''
grd += f'_feature' if args.cluster_features else ''
grd += f'_ca' if args.cluster_all else ''
folder = f'/tmp/cifar10'
folder = f'/home/nehaprakriya/quant/resnet20/'

if args.save_subset:
print(
Expand Down Expand Up @@ -408,6 +423,8 @@ def __len__(self):
np.min(not_selected, 1), np.mean(np.min(not_selected, 1)))




def train(train_loader, model, criterion, optimizer, epoch, weight=None):
"""
Run one train epoch
Expand Down Expand Up @@ -438,7 +455,7 @@ def train(train_loader, model, criterion, optimizer, epoch, weight=None):
# compute output
output = model(input_var)
loss = criterion(output, target_var)
loss = (loss * weight[idx.long()]).mean() # (Note)
loss = (loss).mean() # (Note)

# compute gradient and do SGD step
optimizer.zero_grad()
Expand Down Expand Up @@ -542,6 +559,20 @@ def update(self, val, n=1):
self.count += n
self.avg = self.sum / self.count

# add a function for quant predictions

def quant_predictions(loader, model):
model.eval()
preds=numpy.zeros(TRAIN_NUM, CLASS_NUM)
labels=numpy.zeros(TRAIN_NUM, dtype=torch.int)
with torch.no_grad():
for i, (input, target, idx) in enumerate(loader):
output = model(input)
preds[idx, :] = nn.Softmax(dim=1)(output)
return preds




def predictions(loader, model):
"""
Expand Down Expand Up @@ -576,6 +607,17 @@ def predictions(loader, model):

return preds.cpu().data.numpy(), labels.cpu().data.numpy()

def quantization_predictions(loader, model):
model.to('cpu')
model.eval()
preds = np.zeros((TRAIN_NUM, CLASS_NUM))
labels = np.zeros(TRAIN_NUM)
labels=labels.astype('int32')
for i, (input, target, idx) in enumerate(loader):
preds[idx, :] = nn.Softmax(dim=1)(model(input))
labels[idx] = target.int()
return preds, labels


def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
Expand Down

0 comments on commit acfca2d

Please sign in to comment.