-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f2186be
Showing
5 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
**/__pycache__ | ||
**/*.pyc | ||
caffe | ||
*.kdev4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Copyright (c) 2016, Philipp Krähenbühl | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
The views and conclusions contained in the software and documentation are those | ||
of the authors and should not be interpreted as representing official policies, | ||
either expressed or implied, of the FreeBSD Project. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Data-dependent initialization of convolutional neural networks | ||
|
||
Created by Philipp Krähenbühl. | ||
|
||
### Introduction | ||
|
||
This code implements the initialization presented in our [arXiv tech report](http://arxiv.org/abs/1511.06856), which is under submission at ICLR 2016. | ||
|
||
*This is a reimplementation and currently work in progress. Use at your own risk.* | ||
|
||
### License | ||
|
||
This code is released under the BSD License (refer to the LICENSE file for details). | ||
|
||
### Citing | ||
|
||
If you find our initialization useful in your research, please consider citing: | ||
|
||
@article{krahenbuhl2015data, | ||
title={Data-dependent Initializations of Convolutional Neural Networks}, | ||
author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor}, | ||
journal={arXiv preprint arXiv:1511.06856}, | ||
year={2015} | ||
} | ||
|
||
### Setup | ||
|
||
Checkout the project and create a symlink to caffe in the `magic_init` directory: | ||
```Shell | ||
ln -s path/to/caffe caffe | ||
``` | ||
|
||
### Examples | ||
|
||
*Will follow soon* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import caffe | ||
|
||
def parseProtoString(s): | ||
from google.protobuf import text_format | ||
from caffe.proto import caffe_pb2 as pb | ||
proto_net = pb.NetParameter() | ||
text_format.Merge(s, proto_net) | ||
return proto_net | ||
|
||
|
||
def get_param(l, exclude=set(['top', 'bottom', 'name', 'type'])): | ||
if not hasattr(l,'ListFields'): | ||
if hasattr(l,'__delitem__'): | ||
return list(l) | ||
return l | ||
r = dict() | ||
for f, v in l.ListFields(): | ||
if f.name not in exclude: | ||
r[f.name] = get_param(v, []) | ||
return r | ||
|
||
class ProtoDesc: | ||
def __init__(self, prototxt): | ||
from os import path | ||
self.prototxt = prototxt | ||
self.parsed_proto = parseProtoString(open(self.prototxt, 'r').read()) | ||
# Guess the input dimension | ||
self.input_dim = (3, 227, 227) | ||
net = self.parsed_proto | ||
if len(net.input_dim) > 0: | ||
self.input_dim = net.input_dim[1:] | ||
else: | ||
lrs = net.layer | ||
cs = [l.transform_param.crop_size for l in lrs | ||
if l.HasField('transform_param')] | ||
if len(cs): | ||
self.input_dim = (3, cs[0], cs[0]) | ||
|
||
def __call__(self, clip=None, **inputs): | ||
from caffe import layers as L | ||
from collections import OrderedDict | ||
net = self.parsed_proto | ||
blobs = OrderedDict(inputs) | ||
for l in net.layer: | ||
if l.name not in inputs: | ||
in_place = l.top == l.bottom | ||
param = get_param(l) | ||
assert all([b in blobs for b in l.bottom]), "Some bottoms not founds: " + ', '.join([b for b in l.bottom if not b in blobs]) | ||
tops = getattr(L, l.type)(*[blobs[b] for b in l.bottom], | ||
ntop=len(l.top), in_place=in_place, | ||
name=l.name, | ||
**param) | ||
if len(l.top) <= 1: | ||
tops = [tops] | ||
for i, t in enumerate(l.top): | ||
blobs[t] = tops[i] | ||
if l.name == clip: | ||
break | ||
return list(blobs.values())[-1] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
|
||
INPUT_LAYERS = ['Data', 'ImageData'] | ||
PARAMETER_LAYERS = ['Convolution', 'InnerProduct'] | ||
SUPPORTED_LAYERS = ['ReLU', 'Sigmoid', 'LRN', 'Pooling'] | ||
# Use 'Dropout' at your own risk | ||
# Unless Jon merges #2865 , 'Split' cannot be supported | ||
UNSUPPORTED_LAYERS = ['Split'] | ||
|
||
def forward(net, i, NIT, data, output_names): | ||
n = net._layer_names[i] | ||
# Create the top data if needed | ||
output = {t: [None]*NIT for t in output_names} | ||
for it in range(NIT): | ||
for b in data: | ||
net.blobs[b].data[...] = data[b][it] | ||
net._forward(i, i) | ||
for t in output_names: | ||
output[t][it] = 1*net.blobs[t].data | ||
return output | ||
|
||
def flattenData(data): | ||
import numpy as np | ||
return np.concatenate([d.swapaxes(0, 1).reshape((d.shape[1],-1)) for d in data], axis=1) | ||
|
||
def gatherInputData(net, layer_id, bottom_data, top_name): | ||
# This functions gathers all input data. | ||
# In order to not replicate all the internal functionality of convolutions (eg. padding ...) | ||
# we gather the data in the output space and use random gaussian weights. The output of this | ||
# function is W and D, there the input data I = D * W^-1 [with some abuse of tensor notation] | ||
# If we not compute an initialization A for D, we then simply multiply A by W to obtain the | ||
# proper initialization in the input space | ||
import numpy as np | ||
l = net.layers[layer_id] | ||
NIT = len(list(bottom_data.values())[0]) | ||
# How many times do we need to over-sample to get a full basis (out of random projections) | ||
OS = int(np.ceil( np.prod(l.blobs[0].data.shape[1:]) / l.blobs[0].data.shape[0] )) | ||
# Note this could cause some memory issues in the FC layers | ||
W, D = [], [] | ||
for i in range(OS): | ||
d = l.blobs[0].data | ||
d[...] = np.random.normal(0, 1, d.shape) | ||
W.append(1*d) | ||
D.append(np.concatenate(forward(net, layer_id, NIT, bottom_data, [top_name])[top_name], axis=0)) | ||
return np.concatenate(W, axis=0), np.concatenate(D, axis=1) | ||
|
||
def initializeWeight(D, type, N_OUT): | ||
# TODO: Compute the initialization using D | ||
return np.random.normal(0, 1, (N_OUT,D.shape[1])) | ||
|
||
|
||
def initializeLayer(net, layer_id, bottom_data, top_name, bias=0, type='elwise'): | ||
import numpy as np | ||
l = net.layers[layer_id] | ||
NIT = len(list(bottom_data.values())[0]) | ||
|
||
for p in l.blobs: p.data[...] = 0 | ||
# Initialize the weights [k-means, ...] | ||
if type == 'elwise': | ||
d = l.blobs[0].data | ||
d[...] = np.random.normal(0, 1, d.shape) | ||
else: # Use the input data | ||
# Gather the input data | ||
T, D = gatherInputData(net, layer_id, bottom_data, top_name) | ||
|
||
# Figure out the output dimensionality of d | ||
d = l.blobs[0].data | ||
|
||
# Prepare the data | ||
D = D.swapaxes(0, 1).reshape((D.shape[1],-1)).T | ||
|
||
# Compute the weights | ||
W = initializeWeight(D, type, N_OUT=d.shape[0]) | ||
|
||
# Multiply the weights by the random basis | ||
# NOTE: This matrix multiplication is a bit large, if it's too slow, | ||
# reduce the oversampling in gatherInputData | ||
d[...] = np.dot(W, T.reshape((T.shape[0],-1))).reshape(d.shape) | ||
|
||
# Scale the mean and initialize the bias | ||
top_data = forward(net, layer_id, NIT, bottom_data, [top_name])[top_name] | ||
flat_data = flattenData(top_data) | ||
mu = flat_data.mean(axis=1) | ||
std = flat_data.std(axis=1) | ||
l.blobs[0].data[...] /= std.reshape((-1,)+(1,)*(len(l.blobs[0].data.shape)-1)) | ||
for b in l.blobs[1:]: | ||
b.data[...] = -mu / std + bias | ||
|
||
def magicInitialize(net, bias=0, NIT=10, type='elwise', bottom_names={}, top_names={}): | ||
import numpy as np | ||
# What layers was a certain blob first produced | ||
first_produced = {} | ||
# When was a blob last used | ||
last_used = {} | ||
# Make sure all layers are supported, and compute the range each blob is used in | ||
for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): | ||
if l.type in UNSUPPORTED_LAYERS: | ||
print( "WARNING: Layer type '%s' not supported! Things might go very wrong..."%l.type ) | ||
elif l.type not in SUPPORTED_LAYERS+PARAMETER_LAYERS+INPUT_LAYERS: | ||
print( "Unknown layer type '%s'. double check if it is supported"%l.type ) | ||
for t in top_names[n]: | ||
if not t in first_produced: | ||
first_produced[t] = i | ||
for b in bottom_names[n]: | ||
last_used[b] = i | ||
|
||
active_data = {} | ||
# Read all the input data | ||
for i, (n, l) in enumerate(zip(net._layer_names, net.layers)): | ||
# Initialize the layer | ||
if len(l.blobs) > 0: | ||
assert l.type in PARAMETER_LAYERS, "Unsupported parameter layer" | ||
assert len(top_names[n]) == 1, "Exactly one output supported" | ||
if np.sum(np.abs(l.blobs[0].data)) <= 1e-10: | ||
# Fill the parameters | ||
initializeLayer(net, i, {b: active_data[b] for b in bottom_names[n]}, top_names[n][0], bias, type) | ||
|
||
# TODO: Estimate and rescale the values [TODO: Record and undo this scaling above] | ||
|
||
# Run the network forward | ||
new_data = forward(net, i, NIT, {b: active_data[b] for b in bottom_names[n]}, top_names[n]) | ||
active_data.update(new_data) | ||
|
||
# Delete all unused data | ||
for k in list(active_data): | ||
if k not in last_used or last_used[k] == i: | ||
del active_data[k] | ||
|
||
print( '%-3d %-10s\t%-10s'%(i, n, l.type), '\t\t', ', '.join(list(active_data)) ) | ||
print( [np.mean(np.abs(d)) for d in active_data.values()] ) | ||
|
||
|
||
|
||
def netFromString(s, t=None): | ||
import caffe | ||
from tempfile import NamedTemporaryFile | ||
if t is None: t = caffe.TEST | ||
f = NamedTemporaryFile('w') | ||
f.write(s) | ||
f.flush() | ||
r = caffe.Net(f.name, t) | ||
f.close() | ||
return r | ||
|
||
def layerTypes(net_proto): | ||
return {l.name: l.type for l in net_proto.layer} | ||
|
||
def layerTops(net_proto): | ||
return {l.name: list(l.top) for l in net_proto.layer} | ||
|
||
def layerBottoms(net_proto): | ||
return {l.name: list(l.bottom) for l in net_proto.layer} | ||
|
||
def getFileList(f): | ||
from glob import glob | ||
from os import path | ||
return [f for f in glob(f) if path.isfile(f)] | ||
|
||
def main(): | ||
from argparse import ArgumentParser | ||
from os import path | ||
|
||
parser = ArgumentParser() | ||
parser.add_argument('prototxt') | ||
parser.add_argument('output_caffemodel') | ||
parser.add_argument('-l', '--load', help='Load a pretrained model and rescale it [bias and type are not supported]') | ||
parser.add_argument('-d', '--data', default=None, help='Image list to use [default prototxt data]') | ||
parser.add_argument('-b', '--bias', type=float, default=0.1, help='Bias') | ||
parser.add_argument('-t', '--type', default='elwise', help='Type: elwise, pca, zca, kmeans, rand (random input patches)') | ||
parser.add_argument('-z', action='store_true', help='Zero all weights and reinitialize') | ||
parser.add_argument('-cs', action='store_true', help='Correct for scaling') | ||
parser.add_argument('-q', action='store_true', help='Quiet execution') | ||
parser.add_argument('-s', type=float, default=1.0, help='Scale the input [only custom data "-d"]') | ||
parser.add_argument('-bs', type=int, default=16, help='Batch size [only custom data "-d"]') | ||
parser.add_argument('-nit', type=int, default=10, help='Number of iterations') | ||
parser.add_argument('--gpu', type=int, default=0, help='What gpu to run it on?') | ||
args = parser.parse_args() | ||
|
||
if args.q: | ||
from os import environ | ||
environ['GLOG_minloglevel'] = '2' | ||
import caffe, load | ||
from caffe import NetSpec, layers as L | ||
|
||
caffe.set_mode_gpu() | ||
if args.gpu is not None: | ||
caffe.set_device(args.gpu) | ||
|
||
model = load.ProtoDesc(args.prototxt) | ||
net = NetSpec() | ||
if args.data is not None: | ||
fl = getFileList(args.data) | ||
if len(fl) == 0: | ||
print("Unknown data type for '%s'"%args.data) | ||
exit(1) | ||
from tempfile import NamedTemporaryFile | ||
f = NamedTemporaryFile('w') | ||
f.write('\n'.join([path.abspath(i)+' 0' for i in fl])) | ||
f.flush() | ||
net.data, net.label = L.ImageData(source=f.name, batch_size=args.bs, new_width=model.input_dim[-1], new_height=model.input_dim[-1], transform_param=dict(mean_value=[104,117,123], scale=args.s),ntop=2) | ||
net.out = model(data=net.data, label=net.label) | ||
else: | ||
net.out = model() | ||
|
||
net_proto = net.to_proto() | ||
n = netFromString('force_backward:true\n'+str(net_proto), caffe.TRAIN ) | ||
layer_top = layerTops( net_proto ) | ||
layer_bottoms = layerBottoms( net_proto ) | ||
|
||
if args.load is not None: | ||
n.copy_from(args.load) | ||
# Rescale existing layers? | ||
if args.fix: | ||
magicFix(n, args.nit) | ||
|
||
if args.z: | ||
# Zero out all layers | ||
for l in n.layers: | ||
for b in l.blobs: | ||
b.data[...] = 0 | ||
|
||
magicInitialize(n, args.bias, NIT=args.nit, type=args.type, top_names=layer_top, bottom_names=layer_bottoms) | ||
if args.cs: | ||
calibrateGradientRatio(n) | ||
n.save(args.output_caffemodel) | ||
|
||
if __name__ == "__main__": | ||
main() |