Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
philkr committed Dec 29, 2015
0 parents commit f2186be
Show file tree
Hide file tree
Showing 5 changed files with 353 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
**/__pycache__
**/*.pyc
caffe
*.kdev4
26 changes: 26 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Copyright (c) 2016, Philipp Krähenbühl
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Data-dependent initialization of convolutional neural networks

Created by Philipp Krähenbühl.

### Introduction

This code implements the initialization presented in our [arXiv tech report](http://arxiv.org/abs/1511.06856), which is under submission at ICLR 2016.

*This is a reimplementation and currently work in progress. Use at your own risk.*

### License

This code is released under the BSD License (refer to the LICENSE file for details).

### Citing

If you find our initialization useful in your research, please consider citing:

@article{krahenbuhl2015data,
title={Data-dependent Initializations of Convolutional Neural Networks},
author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor},
journal={arXiv preprint arXiv:1511.06856},
year={2015}
}

### Setup

Checkout the project and create a symlink to caffe in the `magic_init` directory:
```Shell
ln -s path/to/caffe caffe
```

### Examples

*Will follow soon*
61 changes: 61 additions & 0 deletions load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import caffe

def parseProtoString(s):
from google.protobuf import text_format
from caffe.proto import caffe_pb2 as pb
proto_net = pb.NetParameter()
text_format.Merge(s, proto_net)
return proto_net


def get_param(l, exclude=set(['top', 'bottom', 'name', 'type'])):
if not hasattr(l,'ListFields'):
if hasattr(l,'__delitem__'):
return list(l)
return l
r = dict()
for f, v in l.ListFields():
if f.name not in exclude:
r[f.name] = get_param(v, [])
return r

class ProtoDesc:
def __init__(self, prototxt):
from os import path
self.prototxt = prototxt
self.parsed_proto = parseProtoString(open(self.prototxt, 'r').read())
# Guess the input dimension
self.input_dim = (3, 227, 227)
net = self.parsed_proto
if len(net.input_dim) > 0:
self.input_dim = net.input_dim[1:]
else:
lrs = net.layer
cs = [l.transform_param.crop_size for l in lrs
if l.HasField('transform_param')]
if len(cs):
self.input_dim = (3, cs[0], cs[0])

def __call__(self, clip=None, **inputs):
from caffe import layers as L
from collections import OrderedDict
net = self.parsed_proto
blobs = OrderedDict(inputs)
for l in net.layer:
if l.name not in inputs:
in_place = l.top == l.bottom
param = get_param(l)
assert all([b in blobs for b in l.bottom]), "Some bottoms not founds: " + ', '.join([b for b in l.bottom if not b in blobs])
tops = getattr(L, l.type)(*[blobs[b] for b in l.bottom],
ntop=len(l.top), in_place=in_place,
name=l.name,
**param)
if len(l.top) <= 1:
tops = [tops]
for i, t in enumerate(l.top):
blobs[t] = tops[i]
if l.name == clip:
break
return list(blobs.values())[-1]


227 changes: 227 additions & 0 deletions magic_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@

INPUT_LAYERS = ['Data', 'ImageData']
PARAMETER_LAYERS = ['Convolution', 'InnerProduct']
SUPPORTED_LAYERS = ['ReLU', 'Sigmoid', 'LRN', 'Pooling']
# Use 'Dropout' at your own risk
# Unless Jon merges #2865 , 'Split' cannot be supported
UNSUPPORTED_LAYERS = ['Split']

def forward(net, i, NIT, data, output_names):
n = net._layer_names[i]
# Create the top data if needed
output = {t: [None]*NIT for t in output_names}
for it in range(NIT):
for b in data:
net.blobs[b].data[...] = data[b][it]
net._forward(i, i)
for t in output_names:
output[t][it] = 1*net.blobs[t].data
return output

def flattenData(data):
import numpy as np
return np.concatenate([d.swapaxes(0, 1).reshape((d.shape[1],-1)) for d in data], axis=1)

def gatherInputData(net, layer_id, bottom_data, top_name):
# This functions gathers all input data.
# In order to not replicate all the internal functionality of convolutions (eg. padding ...)
# we gather the data in the output space and use random gaussian weights. The output of this
# function is W and D, there the input data I = D * W^-1 [with some abuse of tensor notation]
# If we not compute an initialization A for D, we then simply multiply A by W to obtain the
# proper initialization in the input space
import numpy as np
l = net.layers[layer_id]
NIT = len(list(bottom_data.values())[0])
# How many times do we need to over-sample to get a full basis (out of random projections)
OS = int(np.ceil( np.prod(l.blobs[0].data.shape[1:]) / l.blobs[0].data.shape[0] ))
# Note this could cause some memory issues in the FC layers
W, D = [], []
for i in range(OS):
d = l.blobs[0].data
d[...] = np.random.normal(0, 1, d.shape)
W.append(1*d)
D.append(np.concatenate(forward(net, layer_id, NIT, bottom_data, [top_name])[top_name], axis=0))
return np.concatenate(W, axis=0), np.concatenate(D, axis=1)

def initializeWeight(D, type, N_OUT):
# TODO: Compute the initialization using D
return np.random.normal(0, 1, (N_OUT,D.shape[1]))


def initializeLayer(net, layer_id, bottom_data, top_name, bias=0, type='elwise'):
import numpy as np
l = net.layers[layer_id]
NIT = len(list(bottom_data.values())[0])

for p in l.blobs: p.data[...] = 0
# Initialize the weights [k-means, ...]
if type == 'elwise':
d = l.blobs[0].data
d[...] = np.random.normal(0, 1, d.shape)
else: # Use the input data
# Gather the input data
T, D = gatherInputData(net, layer_id, bottom_data, top_name)

# Figure out the output dimensionality of d
d = l.blobs[0].data

# Prepare the data
D = D.swapaxes(0, 1).reshape((D.shape[1],-1)).T

# Compute the weights
W = initializeWeight(D, type, N_OUT=d.shape[0])

# Multiply the weights by the random basis
# NOTE: This matrix multiplication is a bit large, if it's too slow,
# reduce the oversampling in gatherInputData
d[...] = np.dot(W, T.reshape((T.shape[0],-1))).reshape(d.shape)

# Scale the mean and initialize the bias
top_data = forward(net, layer_id, NIT, bottom_data, [top_name])[top_name]
flat_data = flattenData(top_data)
mu = flat_data.mean(axis=1)
std = flat_data.std(axis=1)
l.blobs[0].data[...] /= std.reshape((-1,)+(1,)*(len(l.blobs[0].data.shape)-1))
for b in l.blobs[1:]:
b.data[...] = -mu / std + bias

def magicInitialize(net, bias=0, NIT=10, type='elwise', bottom_names={}, top_names={}):
import numpy as np
# What layers was a certain blob first produced
first_produced = {}
# When was a blob last used
last_used = {}
# Make sure all layers are supported, and compute the range each blob is used in
for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
if l.type in UNSUPPORTED_LAYERS:
print( "WARNING: Layer type '%s' not supported! Things might go very wrong..."%l.type )
elif l.type not in SUPPORTED_LAYERS+PARAMETER_LAYERS+INPUT_LAYERS:
print( "Unknown layer type '%s'. double check if it is supported"%l.type )
for t in top_names[n]:
if not t in first_produced:
first_produced[t] = i
for b in bottom_names[n]:
last_used[b] = i

active_data = {}
# Read all the input data
for i, (n, l) in enumerate(zip(net._layer_names, net.layers)):
# Initialize the layer
if len(l.blobs) > 0:
assert l.type in PARAMETER_LAYERS, "Unsupported parameter layer"
assert len(top_names[n]) == 1, "Exactly one output supported"
if np.sum(np.abs(l.blobs[0].data)) <= 1e-10:
# Fill the parameters
initializeLayer(net, i, {b: active_data[b] for b in bottom_names[n]}, top_names[n][0], bias, type)

# TODO: Estimate and rescale the values [TODO: Record and undo this scaling above]

# Run the network forward
new_data = forward(net, i, NIT, {b: active_data[b] for b in bottom_names[n]}, top_names[n])
active_data.update(new_data)

# Delete all unused data
for k in list(active_data):
if k not in last_used or last_used[k] == i:
del active_data[k]

print( '%-3d %-10s\t%-10s'%(i, n, l.type), '\t\t', ', '.join(list(active_data)) )
print( [np.mean(np.abs(d)) for d in active_data.values()] )



def netFromString(s, t=None):
import caffe
from tempfile import NamedTemporaryFile
if t is None: t = caffe.TEST
f = NamedTemporaryFile('w')
f.write(s)
f.flush()
r = caffe.Net(f.name, t)
f.close()
return r

def layerTypes(net_proto):
return {l.name: l.type for l in net_proto.layer}

def layerTops(net_proto):
return {l.name: list(l.top) for l in net_proto.layer}

def layerBottoms(net_proto):
return {l.name: list(l.bottom) for l in net_proto.layer}

def getFileList(f):
from glob import glob
from os import path
return [f for f in glob(f) if path.isfile(f)]

def main():
from argparse import ArgumentParser
from os import path

parser = ArgumentParser()
parser.add_argument('prototxt')
parser.add_argument('output_caffemodel')
parser.add_argument('-l', '--load', help='Load a pretrained model and rescale it [bias and type are not supported]')
parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]')
parser.add_argument('-b', '--bias', type=float, default=0.1, help='Bias')
parser.add_argument('-t', '--type', default='elwise', help='Type: elwise, pca, zca, kmeans, rand (random input patches)')
parser.add_argument('-z', action='store_true', help='Zero all weights and reinitialize')
parser.add_argument('-cs', action='store_true', help='Correct for scaling')
parser.add_argument('-q', action='store_true', help='Quiet execution')
parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]')
parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]')
parser.add_argument('-nit', type=int, default=10, help='Number of iterations')
parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?')
args = parser.parse_args()

if args.q:
from os import environ
environ['GLOG_minloglevel'] = '2'
import caffe, load
from caffe import NetSpec, layers as L

caffe.set_mode_gpu()
if args.gpu is not None:
caffe.set_device(args.gpu)

model = load.ProtoDesc(args.prototxt)
net = NetSpec()
if args.data is not None:
fl = getFileList(args.data)
if len(fl) == 0:
print("Unknown data type for '%s'"%args.data)
exit(1)
from tempfile import NamedTemporaryFile
f = NamedTemporaryFile('w')
f.write('\n'.join([path.abspath(i)+' 0' for i in fl]))
f.flush()
net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict(mean_value=[104,117,123], scale=args.s),ntop=2)
net.out = model(data=net.data, label=net.label)
else:
net.out = model()

net_proto = net.to_proto()
n = netFromString('force_backward:true\n'+str(net_proto), caffe.TRAIN )
layer_top = layerTops( net_proto )
layer_bottoms = layerBottoms( net_proto )

if args.load is not None:
n.copy_from(args.load)
# Rescale existing layers?
if args.fix:
magicFix(n, args.nit)

if args.z:
# Zero out all layers
for l in n.layers:
for b in l.blobs:
b.data[...] = 0

magicInitialize(n, args.bias, NIT=args.nit, type=args.type, top_names=layer_top, bottom_names=layer_bottoms)
if args.cs:
calibrateGradientRatio(n)
n.save(args.output_caffemodel)

if __name__ == "__main__":
main()

0 comments on commit f2186be

Please sign in to comment.