forked from odelalleau/PLearn
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathDiscriminativeDeepBeliefNet.h
332 lines (252 loc) · 11.4 KB
/
DiscriminativeDeepBeliefNet.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
// -*- C++ -*-
// DiscriminativeDeepBeliefNet.h
//
// Copyright (C) 2007 Hugo Larochelle
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the authors may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
// NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// This file is part of the PLearn library. For more information on the PLearn
// library, go to the PLearn Web site at www.plearn.org
// Authors: Hugo Larochelle
/*! \file DiscriminativeDeepBeliefNet.h */
#ifndef DiscriminativeDeepBeliefNet_INC
#define DiscriminativeDeepBeliefNet_INC
#include <plearn/vmat/ClassSubsetVMatrix.h>
#include <plearn_learners/generic/PLearner.h>
#include <plearn_learners/online/GradNNetLayerModule.h>
#include <plearn_learners/online/OnlineLearningModule.h>
#include <plearn_learners/online/CostModule.h>
#include <plearn_learners/online/ModuleStackModule.h>
#include <plearn_learners/online/NLLCostModule.h>
#include <plearn_learners/online/ClassErrorCostModule.h>
#include <plearn_learners/online/CombiningCostsModule.h>
#include <plearn_learners/online/RBMClassificationModule.h>
#include <plearn_learners/online/RBMLayer.h>
#include <plearn_learners/online/RBMMixedLayer.h>
#include <plearn_learners/online/RBMConnection.h>
#include <plearn_learners/online/SoftmaxModule.h>
#include <plearn/misc/PTimer.h>
namespace PLearn {
/**
* Deep Belief Net where the stacked RBMs also use a discriminative criteria
*/
class DiscriminativeDeepBeliefNet : public PLearner
{
typedef PLearner inherited;
public:
//##### Public Build Options ############################################
//! Contrastive divergence learning rate
real cd_learning_rate;
//! Contrastive divergence decrease constant
real cd_decrease_ct;
//! The learning rate used during the fine tuning gradient descent
real fine_tuning_learning_rate;
//! The decrease constant of the learning rate used during fine tuning
//! gradient descent
real fine_tuning_decrease_ct;
//! Number of examples to use during each phase of greedy pre-training.
//! The number of fine-tunig steps is defined by nstages.
TVec<int> training_schedule;
//! The layers of units in the network
TVec< PP<RBMLayer> > layers;
//! The weights of the connections between the layers
TVec< PP<RBMConnection> > connections;
//! Additional units for greedy unsupervised learning
TVec< PP<RBMLayer> > unsupervised_layers;
//! Additional connections for greedy unsupervised learning
TVec< PP<RBMConnection> > unsupervised_connections;
//! Number of good nearest neighbors to attract and
//! bad nearest neighbors to repel.
int k_neighbors;
//! Number of classes
int n_classes;
//! Weight of the discriminative criteria
real discriminative_criteria_weight;
//! Output weights l1_penalty_factor
real output_weights_l1_penalty_factor;
//! Output weights l2_penalty_factor
real output_weights_l2_penalty_factor;
//! Indication that the discriminative criteria should use the joint
//! over the input and the hidden units, instead of the conditional
//! over the hidden units given the input units.
bool compare_joint_in_discriminative_criteria;
//! Indication that the generative criteria should not be used during learning
//! (does not work with compare_joint_in_discriminative_criteria = true).
bool do_not_use_generative_criteria;
// //! Indication that the discriminative and generative criteria should cancel
// //! their normalization terms. This is for the compare_joint_in_discriminative_criteria
// //! option, and this option ignores the value of discriminative_criteria_weight.
// bool cancel_normalization_terms;
//##### Public Learnt Options ###########################################
//! Number of layers
int n_layers;
public:
//##### Public Member Functions #########################################
//! Default constructor
DiscriminativeDeepBeliefNet();
//##### PLearner Member Functions #######################################
//! Returns the size of this learner's output, (which typically
//! may depend on its inputsize(), targetsize() and set options).
virtual int outputsize() const;
//! (Re-)initializes the PLearner in its fresh state (that state may depend
//! on the 'seed' option) and sets 'stage' back to 0 (this is the stage of
//! a fresh learner!).
virtual void forget();
//! The role of the train method is to bring the learner up to
//! stage==nstages, updating the train_stats collector with training costs
//! measured on-line in the process.
virtual void train();
//! Computes the output from the input.
virtual void computeOutput(const Vec& input, Vec& output) const;
//! Computes the costs from already computed output.
virtual void computeCostsFromOutputs(const Vec& input, const Vec& output,
const Vec& target, Vec& costs) const;
//! Returns the names of the costs computed by computeCostsFromOutpus (and
//! thus the test method).
virtual TVec<std::string> getTestCostNames() const;
//! Returns the names of the objective costs that the train method computes
//! and for which it updates the VecStatsCollector train_stats.
virtual TVec<std::string> getTrainCostNames() const;
/**
* Declares the training set. Then calls build() and forget() if
* necessary. Also sets this learner's inputsize_ targetsize_ weightsize_
* from those of the training_set. Note: You shouldn't have to override
* this in subclasses, except in maybe to forward the call to an
* underlying learner.
*/
virtual void setTrainingSet(VMat training_set, bool call_forget=true);
void updateNearestNeighbors();
void greedyStep( const Vec& input, const Vec& target, int index,
Vec train_costs, int stage, Vec dissimilar_example);
void fineTuningStep( const Vec& input, const Vec& target,
Vec& train_costs);
void computeRepresentation( const Vec& input,
Vec& representation, int layer) const;
//##### PLearn::Object Protocol #########################################
// Declares other standard object methods.
// ### If your class is not instantiatable (it has pure virtual methods)
// ### you should replace this by PLEARN_DECLARE_ABSTRACT_OBJECT_METHODS
PLEARN_DECLARE_OBJECT(DiscriminativeDeepBeliefNet);
// Simply calls inherited::build() then build_()
virtual void build();
//! Transforms a shallow copy into a deep copy
// (PLEASE IMPLEMENT IN .cc)
virtual void makeDeepCopyFromShallowCopy(CopiesMap& copies);
protected:
//##### Not Options #####################################################
//! Indication that nearest_neighbors_indices is up to date
bool nearest_neighbors_are_up_to_date;
//! Stores the activations of the input and hidden layers
//! (at the input of the layers)
mutable TVec<Vec> activations;
//! Stores the expectations of the input and hidden layers
//! (at the output of the layers)
mutable TVec<Vec> expectations;
//! Stores the gradient of the cost wrt the activations of
//! the input and hidden layers
//! (at the input of the layers)
mutable TVec<Vec> activation_gradients;
//! Stores the gradient of the cost wrt the expectations of
//! the input and hidden layers
//! (at the output of the layers)
mutable TVec<Vec> expectation_gradients;
//! Layers used for greedy learning
TVec< PP<RBMLayer> > greedy_layers;
//! Connections used for greedy learning
TVec< PP<RBMConnection> > greedy_connections;
//! Dissimilar example representation
Vec dissimilar_example_representation;
//! Example representation
mutable Vec input_representation;
//! Positive down statistic
Vec pos_down_val;
//! Positive up statistic
Vec pos_up_val;
//! Negative down statistic
Vec neg_down_val;
//! Negative up statistic
Vec neg_up_val;
//! First discriminative positive down statistic
Vec disc_pos_down_val1;
//! First discriminative positive up statistic
Vec disc_pos_up_val1;
//! Second discriminative positive down statistic
Vec disc_pos_down_val2;
//! Second discriminative positive up statistic
Vec disc_pos_up_val2;
//! Negative down statistic
Vec disc_neg_down_val;
//! Negative up statistic
Vec disc_neg_up_val;
//! Input of cost function
mutable Vec final_cost_input;
//! Cost value
mutable Vec final_cost_value;
//! Cost gradient on output layer
mutable Vec final_cost_gradient;
//! Datasets for each class
TVec< PP<ClassSubsetVMatrix> > other_class_datasets;
//! Nearest neighbors for each training example
TMat<int> nearest_neighbors_indices;
//! Stages of the different greedy phases
TVec<int> greedy_stages;
//! Currently trained layer (1 means the first hidden layer,
//! n_layers means the output layer)
int currently_trained_layer;
//! Output layer of neural net
PP<OnlineLearningModule> final_module;
//! Cost on output layer of neural net
PP<CostModule> final_cost;
protected:
//##### Protected Member Functions ######################################
//! Declares the class options.
static void declareOptions(OptionList& ol);
private:
//##### Private Member Functions ########################################
//! This does the actual building.
void build_();
void build_layers_and_connections();
void build_output_layer_and_cost();
void setLearningRate( real the_learning_rate );
private:
//##### Private Data Members ############################################
// The rest of the private stuff goes here
};
// Declares a few other classes and functions related to this class
DECLARE_OBJECT_PTR(DiscriminativeDeepBeliefNet);
} // end of namespace PLearn
#endif
/*
Local Variables:
mode:c++
c-basic-offset:4
c-file-style:"stroustrup"
c-file-offsets:((innamespace . 0)(inline-open . 0))
indent-tabs-mode:nil
fill-column:79
End:
*/
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=79 :