1
+ import numpy
2
+ import theano
3
+ import cPickle
4
+ import theano .tensor as tt
5
+
6
+ class Layer (object ) :
7
+
8
+ def __init__ (self , name , inputs , nbInputs , nbOutputs , activation = None ) :
9
+ "A generic definition a layer"
10
+ self .reset (name , inputs , nbInputs , nbOutputs , activation )
11
+
12
+ def reset (self , name , inputs , nbInputs , nbOutputs , activation = None ) :
13
+ "resets everything to new parameters"
14
+
15
+ self .nbInputs = nbInputs
16
+ self .inputs = inputs
17
+ self .nbOutputs = nbOutputs
18
+
19
+ self .name = name
20
+ self .activation = activation
21
+
22
+ initWeights = numpy .random .random ((nbInputs , nbOutputs ))
23
+ initWeights = (initWeights / sum (initWeights ))
24
+ initWeights = numpy .asarray (initWeights , dtype = theano .config .floatX )
25
+ self .W = theano .shared (value = initWeights , name = self .name + "_W" )
26
+
27
+ initBias = numpy .zeros ((nbOutputs ,), dtype = theano .config .floatX )
28
+ self .b = theano .shared (value = initBias , name = self .name + "_b" )
29
+
30
+ self .params = [self .W , self .b ]
31
+
32
+ if self .activation is None :
33
+ self .outputs = tt .dot (self .inputs , self .W ) + self .b
34
+ else :
35
+ self .outputs = self .activation (tt .dot (self .inputs , self .W ) + self .b )
36
+
37
+ def serialize (self ) :
38
+ "returns a dict {name, nbInputs, nbOutputs, activation, W, b}"
39
+ return {
40
+ "name" : self .name ,
41
+ "nbInputs" : self .nbInputs ,
42
+ "nbOutputs" : self .nbOutputs ,
43
+ "activation" : self .activation ,
44
+ "W" : self .W .get_value (borrow = True ),
45
+ "b" : self .b .get_value (borrow = True )
46
+ }
47
+
48
+ def __str__ (self ) :
49
+ if self .nbOutputs < 21 :
50
+ o = " O" * self .nbOutputs
51
+ else :
52
+ o = " O O O ... O O O"
53
+ return "%s: [%s ](%s x %s)" % (self .name , o , self .nbInputs , self .nbOutputs )
54
+
55
+ class NeuralNet (object ) :
56
+
57
+ def __init__ (self , name , nbInputs , costFct , lr = 0.01 , momentum = 0 , l1 = 0. , l2 = 0. ) :
58
+ "A neural network"
59
+ self .reset (name , nbInputs , costFct , lr , momentum , l1 , l2 )
60
+
61
+ def reset (self , name , nbInputs , costFct , lr , momentum , l1 , l2 ) :
62
+ self .name = name
63
+ self .nbInputs = nbInputs
64
+ self .costFct = costFct
65
+
66
+ self .inputs = tt .matrix (name = self .name + "_X" )
67
+ self .y = tt .ivector (name = self .name + "_Y" )
68
+ self .layers = []
69
+ self .layersDct = {}
70
+ self .params = []
71
+ self .lr = lr
72
+ self .momentum = momentum
73
+ self .l1 = l1
74
+ self .l2 = l2
75
+
76
+ self ._mustInitUpdates = True
77
+
78
+ def stackLayer (self , name , nbOutputs , activation ) :
79
+ "adds a layer to the stack and returns it"
80
+ if name in self .layersDct :
81
+ raise KeyError ("There's already a layer by the name '%s'" % name )
82
+
83
+ if len (self .layers ) < 1 :
84
+ layer = Layer (self .name + "_" + name , self .inputs , self .nbInputs , nbOutputs , activation )
85
+ else :
86
+ priorLayer = self .layers [- 1 ]
87
+ layer = Layer (self .name + "_" + name , priorLayer .outputs , priorLayer .nbOutputs , nbOutputs , activation )
88
+
89
+ self .layersDct [name ] = (layer , len (self .layers ))
90
+ self .layers .append (layer )
91
+ return layer
92
+
93
+ def popLayer (self ) :
94
+ "removes the last layer from the stack and returns it"
95
+ layer = self .layers .pop ()
96
+ del (self .layersDct [layer .name ])
97
+
98
+ self ._mustInitUpdates = True
99
+
100
+ return layer
101
+
102
+ def _initUpdates (self ) :
103
+ self .outputs = self .layers [- 1 ].outputs
104
+ cost = self .costFct (self .y , self .outputs )
105
+ self .updates = []
106
+ for layer in self .layers :
107
+ self .params .extend (layer .params )
108
+ for param in layer .params :
109
+ # gparam = tt.grad(cost, param) + self.momentun * ()
110
+ gparam = tt .grad (cost , param )
111
+ momentum_param = theano .shared (param .get_value ()* 0. , broadcastable = param .broadcastable )
112
+ self .updates .append ((momentum_param , self .momentum * momentum_param + (1 - self .momentum )* gparam ))
113
+ self .updates .append ((param , param - self .lr * momentum_param ))
114
+
115
+
116
+ L1 = self .l1 * sum ([abs (l .W ).sum () for l in self .layers ])
117
+ L2 = self .l2 * sum ([(l .W ** 2 ).sum () for l in self .layers ])
118
+ cost = self .costFct (self .y , self .outputs ) + L1 + L2
119
+
120
+ self .theano_train = theano .function (inputs = [self .inputs , self .y ], outputs = [cost , self .outputs ], updates = self .updates )
121
+ self .theano_test = theano .function (inputs = [self .inputs , self .y ], outputs = [cost , self .outputs ])
122
+ self .theano_propagate = theano .function (inputs = [self .inputs ], outputs = self .outputs )
123
+ self .theano_prediction = theano .function (inputs = [self .inputs ], outputs = tt .argmax (self .outputs , axis = 1 ))
124
+
125
+ self ._mustInitUpdates = False
126
+
127
+ def train (self , x , y ) :
128
+ if self ._mustInitUpdates :
129
+ self ._initUpdates ()
130
+ # print x.shape
131
+ return self .theano_train (x , y )
132
+
133
+ def test (self , x , y ) :
134
+ "same function for both test and validation"
135
+ if self ._mustInitUpdates :
136
+ self ._initUpdates ()
137
+ return self .theano_test (x , y )
138
+
139
+ def propagate (self , x ) :
140
+ if self ._mustInitUpdates :
141
+ self ._initUpdates ()
142
+ return self .theano_propagate (x )
143
+
144
+ def predict (self , x ) :
145
+ if self ._mustInitUpdates :
146
+ self ._initUpdates ()
147
+ return self .theano_prediction (x )
148
+
149
+ def save (self , filename ) :
150
+ "save the whole model"
151
+ fil = open (filename + '.mdl' , 'wb' )
152
+
153
+ model = {
154
+ "name" : self .name ,
155
+ "nbInputs" : self .nbInputs ,
156
+ "costFct" : self .costFct ,
157
+ "lr" : self .lr ,
158
+ "momentum" : self .momentum ,
159
+ "l1" : self .l1 ,
160
+ "l2" : self .l2 ,
161
+ "layers" : []
162
+ }
163
+
164
+ layers = []
165
+ for layer in self .layers :
166
+ params = []
167
+ layers .append (layer .serialize ())
168
+
169
+ model ["layers" ] = layers
170
+ cPickle .dump (model , fil , - 1 )
171
+ fil .close ()
172
+
173
+ @classmethod
174
+ def load (cls , filename ) :
175
+ "load a previously saved model"
176
+ fil = open (filename )
177
+ model = cPickle .load (fil )
178
+ nn = NeuralNet ( model ["name" ], model ["nbInputs" ], model ["costFct" ], model ["lr" ], model ["momentum" ], model ["l1" ], model ["l2" ])
179
+ for layer in model .layers :
180
+ l = nn .stackLayer (layer ["name" ], layer ["nbOutputs" ], layer ["activation" ])
181
+ l .W = layer ["W" ]
182
+ l .b = layer ["b" ]
183
+ fil .close ()
184
+ return nn
185
+
186
+ def __getitem__ (self , layerName ) :
187
+ return self .layersDct [layerName ]
188
+
189
+ def __str__ (self ) :
190
+ ls = []
191
+ s = "<Net: %s (inputs: %s, cost: %s, lr: %s, momentum: %s, l1: %s, l2: %s)>" % (self .name , self .nbInputs , self .costFct .__name__ , self .lr , self .momentum , self .l1 , self .l2 )
192
+ for l in self .layers :
193
+ strl = str (l )
194
+ ls .append ( ' ' * (len (s )/ 2 ) + 'X' )
195
+ ls .append (' ' * ( (len (s )- len (strl ))/ 2 ) + strl )
196
+ prevLen = strl
197
+
198
+ s += "\n \n %s" % ('\n ' .join (ls ))
199
+ return s
0 commit comments