From 6d8858d34c885917c91bada1f243388eb3e8515c Mon Sep 17 00:00:00 2001 From: Nicholas Leonard Date: Wed, 24 May 2017 14:27:31 -0400 Subject: [PATCH] move dpnn modules --- AbstractRecurrent.lua | 4 + AbstractSequencerCriterion.lua | 6 + BatchNormalization.lua | 10 - BinaryLogisticRegression.lua | 91 --- CAddTensorTable.lua | 43 -- CMakeLists.txt | 43 +- Clip.lua | 35 - Collapse.lua | 26 - Constant.lua | 36 - Convert.lua | 244 ------ Criterion.lua | 4 + Dictionary.lua | 6 - FireModule.lua | 47 -- Inception.lua | 192 ----- Kmeans.lua | 207 ------ LinearNoBias.lua | 65 -- MaskZeroCriterion.lua | 8 +- Module.lua | 128 ---- ModuleCriterion.lua | 44 -- NCECriterion.lua | 61 +- NCEModule.lua | 85 +-- NormStabilizer.lua | 1 - OneHot.lua | 65 -- PCAColorTransform.lua | 117 --- Padding.lua | 52 -- README.md | 554 +------------- RecGRU.lua | 1 - RecLSTM.lua | 1 - Recurrence.lua | 1 - Recursor.lua | 2 - Sequencer.lua | 25 +- Serial.lua | 52 -- SimpleColorTransform.lua | 90 --- SpatialBatchNormalization.lua | 12 - SpatialBinaryConvolution.lua | 173 ----- SpatialBinaryLogisticRegression.lua | 80 -- SpatialConvolution.lua | 9 - SpatialConvolutionMM.lua | 3 - SpatialFeatNormalization.lua | 73 -- SpatialMaxPooling.lua | 6 - SpatialRegionDropout.lua | 80 -- SpatialUniformCrop.lua | 121 --- WhiteNoise.lua | 38 - ZeroGrad.lua | 34 - ZipTable.lua | 34 - ZipTableOneToMany.lua | 37 - deprecated/FastLSTM.lua | 10 +- deprecated/GRU.lua | 9 +- deprecated/LSTM.lua | 5 +- examples/README.md | 1 - examples/multigpu-nce-rnnlm.lua | 3 +- examples/noise-contrastive-estimate.lua | 10 +- examples/recurrent-language-model.lua | 22 +- examples/recurrent-visual-attention.lua | 3 +- init.lua | 41 +- scripts/evaluate-rnnlm.lua | 8 +- test/bigtest.lua | 80 +- test/test.lua | 700 +----------------- test/test_firemodule.lua | 40 - tutorials/ladder.md | 107 --- tutorials/ladder_network/ladder.lua | 444 ----------- .../ladder_network/ladder_help_funcs.lua | 220 ------ tutorials/lena.jpg | Bin 6600 -> 0 bytes tutorials/srd1.jpg | Bin 6504 -> 0 bytes tutorials/srd2.jpg | Bin 6478 -> 0 bytes utils.lua | 2 +- 66 files changed, 155 insertions(+), 4596 deletions(-) delete mode 100644 BinaryLogisticRegression.lua delete mode 100644 CAddTensorTable.lua delete mode 100644 Clip.lua delete mode 100644 Collapse.lua delete mode 100644 Constant.lua delete mode 100644 Convert.lua delete mode 100644 Dictionary.lua delete mode 100644 FireModule.lua delete mode 100644 Inception.lua delete mode 100644 Kmeans.lua delete mode 100644 LinearNoBias.lua delete mode 100644 ModuleCriterion.lua delete mode 100644 OneHot.lua delete mode 100644 PCAColorTransform.lua delete mode 100644 Padding.lua delete mode 100644 Serial.lua delete mode 100644 SimpleColorTransform.lua delete mode 100644 SpatialBatchNormalization.lua delete mode 100644 SpatialBinaryConvolution.lua delete mode 100644 SpatialBinaryLogisticRegression.lua delete mode 100644 SpatialConvolution.lua delete mode 100644 SpatialConvolutionMM.lua delete mode 100644 SpatialFeatNormalization.lua delete mode 100644 SpatialMaxPooling.lua delete mode 100644 SpatialRegionDropout.lua delete mode 100644 SpatialUniformCrop.lua delete mode 100644 WhiteNoise.lua delete mode 100644 ZeroGrad.lua delete mode 100644 ZipTable.lua delete mode 100644 ZipTableOneToMany.lua delete mode 100644 test/test_firemodule.lua delete mode 100644 tutorials/ladder.md delete mode 100644 tutorials/ladder_network/ladder.lua delete mode 100644 tutorials/ladder_network/ladder_help_funcs.lua delete mode 100644 tutorials/lena.jpg delete mode 100644 tutorials/srd1.jpg delete mode 100644 tutorials/srd2.jpg diff --git a/AbstractRecurrent.lua b/AbstractRecurrent.lua index 3338185..3bbac87 100644 --- a/AbstractRecurrent.lua +++ b/AbstractRecurrent.lua @@ -33,6 +33,9 @@ function AbstractRecurrent:getStepModule(step) end function AbstractRecurrent:updateOutput(input) + if self.train ~= false then + self:recycle() + end if self.zeroMask then -- where zeroMask = 1, the past is forgotten, that is, the output/gradOutput is zeroed local stepmodule = (self.train==false) and self.modules[1] or self:getStepModule(self.step) @@ -189,6 +192,7 @@ end function AbstractRecurrent:maskZero(v1) if not self.maskzero then + assert(not torch.isTypeOf(self.modules[1], 'nn.AbstractRecurrent'), "Doesn't support zero-masking on nested AbstractRecurrent instances") self.maskzero = true local stepmodule = nn.MaskZero(self.modules[1], v1) self.sharedClones = {stepmodule} diff --git a/AbstractSequencerCriterion.lua b/AbstractSequencerCriterion.lua index ebac701..b62272a 100644 --- a/AbstractSequencerCriterion.lua +++ b/AbstractSequencerCriterion.lua @@ -46,3 +46,9 @@ function AbstractSequencerCriterion:setZeroMask(zeroMask) end end +function AbstractSequencerCriterion:type(type, typecache) + for key, clone in pairs(self.clones) do + clone:type(type, typecache) + end + return parent.type(self, type, typecache) +end diff --git a/BatchNormalization.lua b/BatchNormalization.lua index 2ffad94..82f5783 100644 --- a/BatchNormalization.lua +++ b/BatchNormalization.lua @@ -1,16 +1,6 @@ local _ = require 'moses' local BN, parent = nn.BatchNormalization, nn.Module -local empty = _.clone(parent.dpnn_mediumEmpty) -table.insert(empty, 'buffer') -table.insert(empty, 'buffer2') -table.insert(empty, 'centered') -table.insert(empty, 'std') -table.insert(empty, 'normalized') -table.insert(empty, 'output') -table.insert(empty, 'gradInput') -BN.dpnn_mediumEmpty = empty - -- for sharedClone local params = _.clone(parent.dpnn_parameters) table.insert(params, 'running_mean') diff --git a/BinaryLogisticRegression.lua b/BinaryLogisticRegression.lua deleted file mode 100644 index 02ccaab..0000000 --- a/BinaryLogisticRegression.lua +++ /dev/null @@ -1,91 +0,0 @@ ------------------------------------------------------------------------- ---[[ BinaryLogisticRegression ]]-- --- Takes an image of size batchSize x 1 or just batchSize as input. --- Computes Binary Logistic Regression Cost. --- Useful for 2 class classification. ------------------------------------------------------------------------- - -local BinaryLogisticRegression, parent = torch.class('nn.BinaryLogisticRegression', 'nn.Criterion') - -function BinaryLogisticRegression:__init(sizeAverage) - parent.__init(self) - if sizeAverage ~= nil then - self.sizeAverage = sizeAverage - else - self.sizeAverage = true - end -end - -function BinaryLogisticRegression:updateOutput(input, target) - local inputDim = input:nDimension() - local targetDim = target:nDimension() - - -- Check dimensions of input and target - assert(inputDim == 1 or inputDim == 2, - "Input:Expecting batchSize or batchSize x 1") - assert(targetDim == 1 or targetDim == 2, - "Target:Expecting batchSize or batchSize x 1") - if inputDim == 2 then - assert(input:size(1)==1 or input:size(2)==1, - "Input: Expecting batchSize x 1.") - end - if targetDim == 2 then - assert(target:size(1)==1 or target:size(2)==1, - "Target: Expecting batchSize x 1.") - end - - local inputElements = input:nElement() - local targetElements = target:nElement() - - assert(inputElements == targetElements, - "No of input and target elements should be same.") - - self._k = inputElements - local input = input:view(-1) - local target = target:view(-1) - - self._baseExponents = self._baseExponents or input.new() - self._coeff = self._coeff or input.new() - self._logCoeff = self._logCoeff or input.new() - - --Compute exponent = -target*input - self._baseExponents:resize(input:size()):copy(input) - self._baseExponents:cmul(target) - self._baseExponents:mul(-1) - -- Compute exp(exponent) - self._baseExponents:exp() - - self._coeff:resize(input:size()):copy(self._baseExponents) - self._coeff:add(1) - - self._logCoeff:resize(input:size()):copy(self._coeff) - self._logCoeff:log() - - if self.sizeAverage then - return self._logCoeff:sum()/(self._k) - else - return self._logCoeff:sum() - end -end - -function BinaryLogisticRegression:updateGradInput(input, target) - self.gradInput = self.gradInput or input.new() - local gradInput = self.gradInput - gradInput:resize(input:size()):copy(target) - gradInput:mul(-1) - gradInput:cmul(self._baseExponents) - gradInput:cdiv(self._coeff) - if self.sizeAverage then - gradInput:div(self._k) - end - return gradInput -end - -function BinaryLogisticRegression:type(type, tensorCache) - if type then - self._baseExponents = nil - self._coeff = nil - self._logCoeff = nil - end - return parent.type(self, type, tensorCache) -end diff --git a/CAddTensorTable.lua b/CAddTensorTable.lua deleted file mode 100644 index 16efe44..0000000 --- a/CAddTensorTable.lua +++ /dev/null @@ -1,43 +0,0 @@ - -local CAddTensorTable, parent = torch.class('nn.CAddTensorTable', 'nn.Module') - -function CAddTensorTable:__init() - parent.__init(self) - self.gradInput = {} -end - --- input is a table with 2 entries. input[1] is the vector to be added. --- input[2] is the table to which we add the vector -function CAddTensorTable:updateOutput(input) - local currentOutput = {} - for i=1,#input[2] do - currentOutput[i] = currentOutput[i] or input[1].new() - currentOutput[i]:resizeAs(input[1]) - currentOutput[i]:copy(input[2][i]) - currentOutput[i]:add(input[1]) - end - for i = #input[2]+1, #currentOutput do - currentOutput[i] = nil - end - self.output = currentOutput - return self.output -end - -function CAddTensorTable:updateGradInput(input, gradOutput) - self.gradInput[1] = self.gradInput[1] or input[1].new() - self.gradInput[1]:resizeAs(input[1]) - self.gradInput[1]:copy(gradOutput[1]) - for i=2, #input[2] do - self.gradInput[1]:add(gradOutput[i]) - end - self.gradInput[2] = self.gradInput[2] or {} - for i=1,#input[2] do - self.gradInput[2][i] = self.gradInput[2][i] or input[1].new() - self.gradInput[2][i]:resizeAs(input[1]) - self.gradInput[2][i]:copy(gradOutput[i]) - end - for i=#input[2]+1, #self.gradInput[2] do - self.gradInput[2][i] = nil - end - return self.gradInput -end \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index ec6dbbb..74efbc0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,21 +17,15 @@ SET(luasrc AbstractSequencer.lua AbstractSequencerCriterion.lua BiSequencer.lua - deprecated/BiSequencerLM.lua CopyGrad.lua Dropout.lua ExpandAs.lua - deprecated/FastLSTM.lua - deprecated/GRU.lua - LinearNoBias.lua LookupTableMaskZero.lua - deprecated/LSTM.lua MaskZero.lua MaskZeroCriterion.lua Module.lua Mufuru.lua NormStabilizer.lua - Padding.lua Recurrence.lua RecurrentAttention.lua Recursor.lua @@ -42,11 +36,8 @@ SET(luasrc SeqBLSTM.lua SeqGRU.lua SeqLSTM.lua - deprecated/SeqLSTMP.lua - deprecated/SeqReverseSequence.lua Sequencer.lua SequencerCriterion.lua - ZeroGrad.lua test/bigtest.lua test/test.lua VariableLength.lua @@ -60,27 +51,13 @@ SET(luasrc ArgMax.lua BatchNormalization.lua BinaryClassReward.lua - BinaryLogisticRegression.lua - CAddTensorTable.lua CategoricalEntropy.lua - Clip.lua - Collapse.lua - Constant.lua Container.lua - Convert.lua Criterion.lua - Dictionary.lua - FireModule.lua - Inception.lua - Kmeans.lua LookupTable.lua - ModuleCriterion.lua NCECriterion.lua NCEModule.lua - OneHot.lua - PCAColorTransform.lua ParallelTable.lua - PrintSize.lua Reinforce.lua ReinforceBernoulli.lua ReinforceCategorical.lua @@ -88,24 +65,16 @@ SET(luasrc ReinforceNormal.lua ReverseSequence.lua Sequential.lua - Serial.lua - SimpleColorTransform.lua - SpatialBatchNormalization.lua - SpatialBinaryConvolution.lua - SpatialBinaryLogisticRegression.lua - SpatialConvolution.lua - SpatialConvolutionMM.lua - SpatialFeatNormalization.lua SpatialGlimpse.lua - SpatialMaxPooling.lua - SpatialRegionDropout.lua - SpatialUniformCrop.lua TotalDropout.lua VRClassReward.lua - WhiteNoise.lua - ZipTable.lua - ZipTableOneToMany.lua ReverseUnreverse.lua + deprecated/SeqLSTMP.lua + deprecated/SeqReverseSequence.lua + deprecated/BiSequencerLM.lua + deprecated/FastLSTM.lua + deprecated/GRU.lua + deprecated/LSTM.lua ) ADD_TORCH_PACKAGE(rnn "${src}" "${luasrc}" "An RNN library for Torch") diff --git a/Clip.lua b/Clip.lua deleted file mode 100644 index fdd04de..0000000 --- a/Clip.lua +++ /dev/null @@ -1,35 +0,0 @@ ------------------------------------------------------------------------- ---[[ Clip ]]-- --- clips values within minval and maxval ------------------------------------------------------------------------- -local Clip, parent = torch.class("nn.Clip", "nn.Module") - -function Clip:__init(minval, maxval) - assert(torch.type(minval) == 'number') - assert(torch.type(maxval) == 'number') - self.minval = minval - self.maxval = maxval - parent.__init(self) -end - -function Clip:updateOutput(input) - -- bound results within height and width - self._mask = self._mask or input.new() - self._byte = self._byte or torch.ByteTensor() - self.output:resizeAs(input):copy(input) - self._mask:gt(self.output, self.maxval) - local byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask - or self._byte:resize(self._mask:size()):copy(self._mask) - self.output[byte] = self.maxval - self._mask:lt(self.output, self.minval) - byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask - or self._byte:resize(self._mask:size()):copy(self._mask) - self.output[byte] = self.minval - return self.output -end - -function Clip:updateGradInput(input, gradOutput) - self.gradInput:set(gradOutput) - return self.gradInput -end - diff --git a/Collapse.lua b/Collapse.lua deleted file mode 100644 index 95fb98e..0000000 --- a/Collapse.lua +++ /dev/null @@ -1,26 +0,0 @@ -local Collapse, parent = torch.class('nn.Collapse', 'nn.Module') - --- collapses non-batch dims -function Collapse:__init(nInputDim) - parent.__init(self) - self.nInputDim = nInputDim -end - -function Collapse:updateOutput(input) - if not input:isContiguous() then - self._input = self._input or input.new() - self._input:resize(input:size()):copy(input) - input = self._input - end - if input:dim() > self.nInputDim then - self.output:view(input,input:size(1),-1) - else - self.output:view(input,-1) - end - return self.output -end - -function Collapse:updateGradInput(input, gradOutput) - self.gradInput:view(gradOutput, input:size()) - return self.gradInput -end diff --git a/Constant.lua b/Constant.lua deleted file mode 100644 index fdfdff4..0000000 --- a/Constant.lua +++ /dev/null @@ -1,36 +0,0 @@ ------------------------------------------------------------------------- ---[[ Constant ]]-- --- Outputs a constant value given an input. --- If nInputDim is specified, uses the input to determine the size of --- the batch. The value is then replicated over the batch. --- You can use this with nn.ConcatTable() to append constant inputs to --- an input : nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) . ------------------------------------------------------------------------- -local Constant, parent = torch.class("nn.Constant", "nn.Module") - -function Constant:__init(value, nInputDim) - self.value = value - if torch.type(self.value) == 'number' then - self.value = torch.Tensor{self.value} - end - assert(torch.isTensor(self.value), "Expecting number or tensor at arg 1") - self.nInputDim = nInputDim - parent.__init(self) -end - -function Constant:updateOutput(input) - if self.nInputDim and input:dim() > self.nInputDim then - local vsize = self.value:size():totable() - self.output:resize(input:size(1), table.unpack(vsize)) - local value = self.value:view(1, table.unpack(vsize)) - self.output:copy(value:expand(self.output:size())) - else - self.output:resize(self.value:size()):copy(self.value) - end - return self.output -end - -function Constant:updateGradInput(input, gradOutput) - self.gradInput:resizeAs(input):zero() - return self.gradInput -end diff --git a/Convert.lua b/Convert.lua deleted file mode 100644 index 76d20ef..0000000 --- a/Convert.lua +++ /dev/null @@ -1,244 +0,0 @@ ------------------------------------------------------------------------- ---[ nn.Convert ]-- --- Module to convert between different data formats --- nn.Convert('bchw', 'bf') or nn.Convert('chw', 'f') --- Automatically converts input to same type as self.output --- Simplest use is for automatic input type converions : nn.Convert() ------------------------------------------------------------------------- -local _ = require 'moses' -local Convert, parent = torch.class("nn.Convert", "nn.Container") - -function Convert:__init(inputShape, outputShape) - if outputShape and not inputShape then - error"Expecting non-nil arg 1 when arg 2 is provided" - end - inputShape = inputShape or 'b*' - outputShape = outputShape or inputShape - self.inputShape = inputShape:find('b') and inputShape or ('b'..inputShape) - self.outputShape = outputShape:find('b') and outputShape or ('b'..outputShape) - self.inputBatchDim = self.inputShape:find('b') - self.outputBatchDim = self.outputShape:find('b') - if self.inputShape == 'b*' or self.outputShape == 'b*' then - assert(self.inputShape == 'b*' and self.outputShape == 'b*', 'Both or neither shapes must be b*') - self.nInputDim = -1 - self.nOutputDim = -1 - self.transposition = true - else - -- number of dims in batch mode - self.nInputDim = #self.inputShape - self.nOutputDim = #self.outputShape - -- is the outputShape just a transposition of the inputShape? - if self.nInputDim == self.nOutputDim then - self.transposition = true - for i=1,self.nInputDim do - if not self.outputShape:find(self.inputShape:sub(i,i)) then - self.transposition = false - break - end - end - end - end - parent.__init(self) -end - --- post-initialization -function Convert:buildConverter(input) - if self.transposition then - self.converter = self:transpose(self.outputShape) - else - if (torch.type(self[self.outputShape]) ~= 'function') then - error(string.format("Unrecognized conversion of shape %s to %s", self.inputShape, self.outputShape)) - end - self.converter = self[self.outputShape](self, input) - end - assert(torch.isTensor(self.output), "Expecting Tensor output") - - self.converter:type(torch.type(self.output)) - self.converter:serialMode(self.dpnn_serialEmpty, self.dpnn_serialType) - - self.modules[1] = self.converter -end - -function Convert:updateOutput(input) - assert(torch.isTensor(input), "expecting Tensor") - if not torch.isTypeOf(input, torch.type(self.output)) then - -- handle different input type - self._input = self._input or self.output.new() - self._input:resize(input:size()):copy(input) - input = self._input - end - self.batchMode = true - if input:dim() < self.nInputDim then - -- handle non-batch mode - local inputSize = input:size():totable() - table.insert(inputSize, self.inputBatchDim, 1) - self.__input = self.__input or input.new() - self.__input:set(input):resize(unpack(inputSize)) - input = self.__input - self.batchMode = false - end - if not self.converter then - self:buildConverter(input) - end - - self.output = self.converter:updateOutput(input) - - if not self.batchMode then - local outputSize = self.output:size():totable() - table.remove(outputSize, self.outputBatchDim) - self.__output = self.__output or self.output.new() - self.__output:set(self.output):resize(unpack(outputSize)) - self.output = self.__output - end - return self.output -end - -function Convert:updateGradInput(input, gradOutput) - local input_ = input - input = self._input or input - if not self.batchMode then - input = self.__input - self.__gradOutput = self.__gradOutput or gradOutput.new() - self.__gradOutput:set(gradOutput):resize(self.converter.output:size()) - gradOutput = self.__gradOutput - end - - local gradInput = self.converter:updateGradInput(input, gradOutput) - - if not self.batchMode then - self.__gradInput = self.__gradInput or gradInput.new() - self.__gradInput:set(gradInput):resize(input_:size()) - gradInput = self.__gradInput - end - if self._input then - self._gradInput = self._gradInput or input.new() - self._gradInput:resize(input:size()):copy(gradInput) - self.gradInput = self._gradInput - else - self.gradInput = gradInput - end - - return self.gradInput -end - -function Convert:accGradParameters(input, gradOutput, scale) - input = self.batchMode and self.__input or self._input or input - gradOutput = self.batchMode and self.__gradOutput or gradOutput - self.converter:accGradParameters(input, gradOutput, scale) -end - -function Convert:accUpdateGradParameters(input, gradOutput, lr) - input = self.batchMode and self.__input or self._input or input - gradOutput = self.batchMode and self.__gradOutput or gradOutput - self.converter:accUpdateGradParameters(input, gradOutput, lr) -end - --- batch feature -function Convert:bf(input) - local b_pos = self:findAxis('b', self.inputShape) - local dim = #self.inputShape - if self.inputShape == 'bt' then - error"Conversion of shape bt to bf not supported: open an issue on github" - end - -- was b - if dim == 1 then - return nn.Reshape(1) - end - -- was b... - local modula - if b_pos ~= 1 then - modula = nn.Transpose({1, b_pos}) - end - if dim > 2 then - local transpose = modula - local sampleSize = input:select(self:findAxis('b'),1):nElement() - local reshape = nn.Reshape(sampleSize) - if transpose then - modula = nn.Sequential() - modula:add(transpose) - modula:add(reshape) - else - modula = reshape - end - end - return modula or nn.Identity() -end - --- each example is a scalar; batch is a vector -function Convert:b(input) - local b_pos = self:findAxis('b') - if self.inputShape == 'bt' or self.inputShape == 'tb' then - local t_pos = self:findAxis('t') - -- select first set of classes - return nn.Select(t_pos, 1) - elseif self.inputShape == 'bf' or self.inputShape == 'fb' then - -- this wont work as expected with size(f) > 1 - local f_pos = self:findAxis('f') - if input:size(f_pos) > 1 then - error("Cannot convert shape "..self.inputShape.." to b when feature > 1") - end - return nn.Select(f_pos, 1) - else - error("Cannot convert shape "..self.inputShape.." to shape b") - end -end - --- returns the current shape of the data -function Convert:default() - return nn.Identity() -end - --- multi-class (batch target) -function Convert:bt() - local b_pos = self:findAxis('b') - local modula - if self.inputShape == 'b' then - modula = nn.Reshape(1) - else - error("cannot convert shape '"..self.inputShape.."' to bt") - end - return modula -end - --- a generic function for transposing shape axes -function Convert:transpose(newShape) - if newShape == self.inputShape then - return nn.Identity() - end - local inputShape = {} - for i=1,#self.inputShape do - table.insert(inputShape, self.inputShape:sub(i,i)) - end - local transpositions = {} - for i=1,#newShape do - local j = _.indexOf(inputShape, newShape:sub(i,i)) - if i ~= j then - local char = inputShape[i] - inputShape[i] = inputShape[j] - inputShape[j] = char - table.insert(transpositions, {j, i}) - end - end - return nn.Transpose(unpack(transpositions)) -end - -function Convert:findAxis(axis_char, shape, silent) - shape = shape or self.inputShape - local axis_pos = shape:find(axis_char) - if (not silent) and (not axis_pos) then - error("Provided shape '"..shape.."' has no axis '"..axis_char.."'", 2) - end - return axis_pos -end - -function Convert:type(type) - if not torch.isTypeOf(self.output, type) then - self._input = nil - self._gradInput = nil - self.__input = nil - self.__output = nil - self.__gradInput = nil - self.__gradOutput = nil - end - return parent.type(self, type) -end diff --git a/Criterion.lua b/Criterion.lua index 7f21a8a..11735d5 100644 --- a/Criterion.lua +++ b/Criterion.lua @@ -13,4 +13,8 @@ function Criterion:setZeroMask(zeroMask) if self.criterion then self.criterion:setZeroMask(zeroMask) end +end + +function Criterion:clearState() + return nn.utils.clear(self, 'gradInput') end \ No newline at end of file diff --git a/Dictionary.lua b/Dictionary.lua deleted file mode 100644 index 238283c..0000000 --- a/Dictionary.lua +++ /dev/null @@ -1,6 +0,0 @@ -local Dictionary, parent = torch.class("nn.Dictionary", "nn.LookupTable") - --- don't use this with optim (useless), use nn.LookupTable instead -function Dictionary:__init(dictSize, embeddingSize, accUpdate) - error"DEPRECATED Jan 14, 2016" -end diff --git a/FireModule.lua b/FireModule.lua deleted file mode 100644 index f4e583e..0000000 --- a/FireModule.lua +++ /dev/null @@ -1,47 +0,0 @@ ---[[ - Fire module as explained in SqueezeNet http://arxiv.org/pdf/1602.07360v1.pdf. ---]] ---FIXME works only for batches. - -local FireModule, Parent = torch.class('nn.FireModule', 'nn.Decorator') - -function FireModule:__init(nInputPlane, s1x1, e1x1, e3x3, activation) - self.nInputPlane = nInputPlane - self.s1x1 = s1x1 - self.e1x1 = e1x1 - self.e3x3 = e3x3 - self.activation = activation or 'ReLU' - - if self.s1x1 > (self.e1x1 + self.e3x3) then - print('Warning: s1x1 is recommended to be smaller'.. - ' then e1x1+e3x3') - end - - self.module = nn.Sequential() - self.squeeze = nn.SpatialConvolution(nInputPlane, s1x1, 1, 1) - self.expand = nn.Concat(2) - self.expand:add(nn.SpatialConvolution(s1x1, e1x1, 1, 1)) - self.expand:add(nn.SpatialConvolution(s1x1, e3x3, 3, 3, 1, 1, 1, 1)) - - -- Fire Module - self.module:add(self.squeeze) - self.module:add(nn[self.activation]()) - self.module:add(self.expand) - self.module:add(nn[self.activation]()) - - Parent.__init(self, self.module) -end - ---[[ -function FireModule:type(type, tensorCache) - assert(type, 'Module: must provide a type to convert to') - self.module = nn.utils.recursiveType(self.module, type, tensorCache) -end ---]] - -function FireModule:__tostring__() - return string.format('%s inputPlanes: %d -> Squeeze Planes: %d -> '.. - 'Expand: %d(1x1) + %d(3x3), activation: %s', - torch.type(self), self.nInputPlane, self.s1x1, - self.e1x1, self.e3x3, self.activation) -end diff --git a/Inception.lua b/Inception.lua deleted file mode 100644 index 7d57c25..0000000 --- a/Inception.lua +++ /dev/null @@ -1,192 +0,0 @@ ------------------------------------------------------------------------- --- [[ Inception ]]-- --- Uses n+2 parallel "columns". The original paper uses 2+2 where --- the first two are (but there could be more than two): --- 1x1 conv (reduce) -> relu -> 5x5 conv -> relu --- 1x1 conv (reduce) -> relu -> 3x3 conv -> relu --- and where the other two are : --- 3x3 maxpool -> 1x1 conv (reduce/project) -> relu --- 1x1 conv (reduce) -> relu. --- This Model allows the first group of columns to be of any --- number while the last group consist of exactly two columns. --- The 1x1 conv are used to reduce the number of input channels --- (or filters) such that the capacity of the network doesnt --- explode. We refer to these here has "reduce". Since each --- column seems to have one and only one reduce, their initial --- configuration options are specified in lists of n+2 elements. ------------------------------------------------------------------------- -local Inception, parent = torch.class("nn.Inception", "nn.Decorator") - -function Inception:__init(config) - --[[ Required Arguments ]]-- - -- Number of input channels or colors - self.inputSize = config.inputSize - -- Number of filters in the non-1x1 convolution kernel sizes, e.g. {32,48} - self.outputSize = config.outputSize - -- Number of filters in the 1x1 convolutions (reduction) - -- used in each column, e.g. {48,64,32,32}. The last 2 are - -- used respectively for the max pooling (projection) column - -- (the last column in the paper) and the column that has - -- nothing but a 1x1 conv (the first column in the paper). - -- This table should have two elements more than the outputSize - self.reduceSize = config.reduceSize - - --[[ Optional Arguments ]]-- - -- The strides of the 1x1 (reduction) convolutions. Defaults to {1,1,...} - self.reduceStride = config.reduceStride or {} - -- A transfer function like nn.Tanh, nn.Sigmoid, nn.ReLU, nn.Identity, etc. - -- It is used after each reduction (1x1 convolution) and convolution - self.transfer = config.transfer or nn.ReLU() - -- batch normalization can be awesome - self.batchNorm = config.batchNorm - -- Adding padding to the input of the convolutions such that - -- input width and height are same as that of output. - self.padding = true - if config.padding ~= nil then - self.padding = config.padding - end - -- The size (height=width) of the non-1x1 convolution kernels. - self.kernelSize = config.kernelSize or {5,3} - -- The stride (height=width) of the convolution. - self.kernelStride = config.kernelStride or {1,1} - -- The size (height=width) of the spatial max pooling used - -- in the next-to-last column. - self.poolSize = config.poolSize or 3 - -- The stride (height=width) of the spatial max pooling. - self.poolStride = config.poolStride or 1 - -- The pooling layer. - self.pool = config.pool or nn.SpatialMaxPooling(self.poolSize, self.poolSize, self.poolStride, self.poolStride) - - - -- Variables checking that all of the output sizes are the same for a sample input. - local iWidth, iHeight = 100, 200 - local oWidth, oHeight - - -- [[ Module Construction ]]-- - local depthConcat = nn.DepthConcat(2) -- concat on 'c' dimension - -- 1x1 conv (reduce) -> 3x3 conv - -- 1x1 conv (reduce) -> 5x5 conv - -- ... - for i=1,#self.kernelSize do - local mlp = nn.Sequential() - -- 1x1 conv - local reduce = nn.SpatialConvolution( - self.inputSize, self.reduceSize[i], 1, 1, - self.reduceStride[i] or 1, self.reduceStride[i] or 1 - ) - mlp:add(reduce) - if self.batchNorm then - mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) - end - mlp:add(self.transfer:clone()) - - -- nxn conv - local pad = self.padding and math.floor(self.kernelSize[i]/2) or 0 - local conv = nn.SpatialConvolution( - self.reduceSize[i], self.outputSize[i], - self.kernelSize[i], self.kernelSize[i], - self.kernelStride[i], self.kernelStride[i], - pad - ) - mlp:add(conv) - if self.batchNorm then - mlp:add(nn.SpatialBatchNormalization(self.outputSize[i])) - end - mlp:add(self.transfer:clone()) - depthConcat:add(mlp) - - -- Check the output sizes. - local oWidth_i = torch.floor( - (iWidth + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1) - local oHeight_i = torch.floor( - (iHeight + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1) - if oWidth == nil then - oWidth = oWidth_i - oHeight = oHeight_i - else - if oWidth ~= oWidth_i or oHeight ~= oHeight_i then - print("dpnn.Inception: Warning: Inconsistent output sizes.") - end - end - end - - -- pool -> 1x1 conv - local mlp = nn.Sequential() - mlp:add(self.pool) - -- not sure if transfer should go here? mlp:add(transfer:clone()) - local i = #(self.kernelSize) + 1 - if self.reduceSize[i] then - local reduce = nn.SpatialConvolution( - self.inputSize, self.reduceSize[i], 1, 1, - self.reduceStride[i] or 1, self.reduceStride[i] or 1 - ) - mlp:add(reduce) - if self.batchNorm then - mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) - end - mlp:add(self.transfer:clone()) - end - depthConcat:add(mlp) - - -- Check the output sizes. Infer the operation of the pooling layer. - if self.pool.kW ~= nil and self.pool.dW ~= nil and self.pool.padW ~= nil then - assert(oWidth ~= nil) - assert(oHeight ~= nil) - local oWidth_pool = torch.floor( - (iWidth + 2*self.pool.padW - self.pool.kW)/self.pool.dW + 1) - local oHeight_pool = torch.floor( - (iHeight + 2*self.pool.padH - self.pool.kH)/self.pool.dH + 1) - if oWidth ~= oWidth_pool or oHeight ~= oHeight_pool then - print("dpnn.Inception: Warning: Inconsistent output sizes in pooling.") - end - end - - -- reduce: 1x1 conv (channel-wise pooling) - i = i + 1 - if self.reduceSize[i] then - local mlp = nn.Sequential() - local reduce = nn.SpatialConvolution( - self.inputSize, self.reduceSize[i], 1, 1, - self.reduceStride[i] or 1, self.reduceStride[i] or 1 - ) - mlp:add(reduce) - if self.batchNorm then - mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) - end - mlp:add(self.transfer:clone()) - depthConcat:add(mlp) - - -- Check the output sizes. - local oWidth_conv = torch.floor((iWidth - 1)/(self.reduceStride[i] or 1) + 1) - local oHeight_conv = torch.floor((iHeight - 1)/(self.reduceStride[i] or 1) + 1) - if oWidth ~= oWidth_conv or oHeight ~= oHeight_conv then - print("dpnn.Inception: Warning: Inconsistent output sizes in 1x1 conv.") - end - end - - parent.__init(self, depthConcat) -end - -function Inception:updateOutput(input) - local input = self:toBatch(input, 3) - local output = self.modules[1]:updateOutput(input) - self.output = self:fromBatch(output, 3) - return self.output -end - -function Inception:updateGradInput(input, gradOutput) - local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) - local gradInput = self.modules[1]:updateGradInput(input, gradOutput) - self.gradInput = self:fromBatch(gradInput, 3) - return self.gradInput -end - -function Inception:accGradParameters(input, gradOutput, scale) - local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) - self.modules[1]:accGradParameters(input, gradOutput, scale) -end - -function Inception:accUpdateGradParameters(input, gradOutput, lr) - local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) - self.modules[1]:accUpdateGradParameters(input, gradOutput, lr) -end diff --git a/Kmeans.lua b/Kmeans.lua deleted file mode 100644 index b67401f..0000000 --- a/Kmeans.lua +++ /dev/null @@ -1,207 +0,0 @@ --- Online (Hard) Kmeans layer. -local Kmeans, parent = torch.class('nn.Kmeans', 'nn.Module') - -function Kmeans:__init(k, dim, scale) - parent.__init(self) - self.k = k - self.dim = dim - - -- scale for online kmean update - self.scale = scale - - assert(k > 0, "Clusters cannot be 0 or negative.") - assert(dim > 0, "Dimensionality cannot be 0 or negative.") - - -- Kmeans centers -> self.weight - self.weight = torch.Tensor(self.k, self.dim) - - self.gradWeight = torch.Tensor(self.weight:size()) - self.loss = 0 -- within cluster error of the last forward - - self.clusterSampleCount = torch.Tensor(self.k) - - self:reset() -end - --- Reset -function Kmeans:reset(stdev) - stdev = stdev or 1 - self.weight:uniform(-stdev, stdev) -end - --- Initialize Kmeans weight with random samples from input. -function Kmeans:initRandom(input) - local inputDim = input:nDimension() - assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") - - local noOfSamples = input:size(1) - local dim = input:size(2) - assert(dim == self.dim, "Dimensionality of input and weight don't match.") - assert(noOfSamples >= self.k, "Need atleast k samples for initialization.") - - local indices = torch.zeros(self.k) - indices:random(1, noOfSamples) - - for i=1, self.k do - self.weight[i]:copy(input[indices[i]]) - end -end - --- Initialize using Kmeans++ -function Kmeans:initKmeansPlus(input, p) - self.p = p or self.p or 0.95 - assert(self.p>=0 and self.p<=1, "P value should be between 0-1.") - - local inputDim = input:nDimension() - assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") - local noOfSamples = input:size(1) - - local pcount = math.ceil((1-self.p)*noOfSamples) - if pcount <= 0 then pcount = 1 end - - local initializedK = 1 - self.weight[initializedK]:copy(input[torch.random(noOfSamples)]) - initializedK = initializedK + 1 - - local clusters = self.weight.new() - local clusterDistances = self.weight.new() - local temp = self.weight.new() - local expandedSample = self.weight.new() - local distances = self.weight.new() - distances:resize(noOfSamples):fill(math.huge) - local maxScores = self.weight.new() - local maxIndx = self.weight.new() - - for k=initializedK, self.k do - clusters = self.weight[{{initializedK-1, initializedK-1}}] - for i=1, noOfSamples do - temp:expand(input[{{i}}], 1, self.dim) - expandedSample:resize(temp:size()):copy(temp) - - -- Squared Euclidean distance - expandedSample:add(-1, clusters) - clusterDistances:norm(expandedSample, 2, 2) - clusterDistances:pow(2) - distances[i] = math.min(clusterDistances:min(), distances[i]) - end - maxScores, maxIndx = distances:sort(true) - local tempIndx = torch.random(pcount) - local indx = maxIndx[tempIndx] - self.weight[initializedK]:copy(input[indx]) - initializedK = initializedK + 1 - end -end - --- Kmeans updateOutput (forward) -function Kmeans:updateOutput(input) - local inputDim = input:nDimension() - assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") - - local batchSize = input:size(1) - local dim = input:size(2) - assert(dim == self.dim, "Dimensionality of input and weight don't match.") - - assert(input:isContiguous(), "Input is not contiguous.") - - -- a sample copied k times to compute distance between sample and weight - self._expandedSamples = self._expandedSamples or self.weight.new() - - -- distance between a sample and weight - self._clusterDistances = self._clusterDistances or self.weight.new() - - self._temp = self._temp or input.new() - self._tempExpanded = self._tempExpanded or input.new() - - -- Expanding inputs - self._temp:view(input, 1, batchSize, self.dim) - self._tempExpanded:expand(self._temp, self.k, batchSize, self.dim) - self._expandedSamples:resize(self.k, batchSize, self.dim) - :copy(self._tempExpanded) - - -- Expanding weights - self._tempWeight = self._tempWeight or self.weight.new() - self._tempWeightExp = self._tempWeightExp or self.weight.new() - self._expandedWeight = self._expanedWeight or self.weight.new() - self._tempWeight:view(self.weight, self.k, 1, self.dim) - self._tempWeightExp:expand(self._tempWeight, self._expandedSamples:size()) - self._expandedWeight:resize(self.k, batchSize, self.dim) - :copy(self._tempWeightExp) - - -- x-c - self._expandedSamples:add(-1, self._expandedWeight) - -- Squared Euclidean distance - self._clusterDistances:norm(self._expandedSamples, 2, 3) - self._clusterDistances:pow(2) - self._clusterDistances:resize(self.k, batchSize) - - self._minScore = self._minScore or self.weight.new() - self._minIndx = self._minIndx or (torch.isCudaTensor(input) and torch.CudaLongTensor() or torch.LongTensor()) - self._minScore:min(self._minIndx, self._clusterDistances, 1) - self._minIndx:resize(batchSize) - - self.output:resize(batchSize):copy(self._minIndx) - self.loss = self._minScore:sum() - - return self.output -end - --- Kmeans has its own criterion hence gradInput are zeros -function Kmeans:updateGradInput(input, gradOuput) - self.gradInput:resize(input:size()):zero() - - return self.gradInput -end - --- We define kmeans update rule as c -> c + scale * 1/n * sum_i (x-c). --- n is no. of x's belonging to c. --- With this update rule and gradient descent will be negative the gradWeights. -function Kmeans:accGradParameters(input, gradOutput, scale) - local scale = self.scale or scale or 1 - assert(scale > 0 , " Scale has to be positive.") - - -- Update cluster sample count - local batchSize = input:size(1) - self._cscAdder = self._cscAdder or self.weight.new() - self._cscAdder:resize(batchSize):fill(1) - self.clusterSampleCount:zero() - self.clusterSampleCount:indexAdd(1, self._minIndx, self._cscAdder) - - -- scale * (x[k]-c[k]) where k is nearest cluster to x - self._gradWeight = self._gradWeight or self.gradWeight.new() - self._gradWeight:index(self.weight, 1, self._minIndx) - self._gradWeight:mul(-1) - self._gradWeight:add(input) - self._gradWeight:mul(-scale) - - self._gradWeight2 = self._gradWeight2 or self.gradWeight.new() - self._gradWeight2:resizeAs(self.gradWeight):zero() - self._gradWeight2:indexAdd(1, self._minIndx, self._gradWeight) - - -- scale/n * sum_i (x-c) - self._ccounts = self._ccounts or self.clusterSampleCount.new() - self._ccounts:resize(self.k):copy(self.clusterSampleCount) - self._ccounts:add(0.0000001) -- prevent division by zero errors - - self._gradWeight2:cdiv(self._ccounts:view(self.k,1):expandAs(self.gradWeight)) - - self.gradWeight:add(self._gradWeight2) -end - -function Kmeans:clearState() - -- prevent premature memory allocations - self._expandedSamples = nil - self._clusterDistances = nil - self._temp = nil - self._tempExpanded = nil - self._tempWeight = nil - self._tempWeightExp = nil - self._expandedWeight = nil - self._minScore = nil - self._minIndx = nil - self._cscAdder = nil -end - -function Kmeans:type(type, tensorCache) - self:clearState() - return parent.type(self, type, tensorCache) -end diff --git a/LinearNoBias.lua b/LinearNoBias.lua deleted file mode 100644 index 3b4fd27..0000000 --- a/LinearNoBias.lua +++ /dev/null @@ -1,65 +0,0 @@ ------------------------------------------------------------------------- ---[[ LinearNoBias ]]-- --- Subclass of nn.Linear with no bias term ------------------------------------------------------------------------- -nn = require 'nn' -local LinearNoBias, Linear = torch.class('nn.LinearNoBias', 'nn.Linear') - -function LinearNoBias:__init(inputSize, outputSize) - nn.Module.__init(self) - - self.weight = torch.Tensor(outputSize, inputSize) - self.gradWeight = torch.Tensor(outputSize, inputSize) - - self:reset() -end - -function LinearNoBias:reset(stdv) - if stdv then - stdv = stdv * math.sqrt(3) - else - stdv = 1./math.sqrt(self.weight:size(2)) - end - if nn.oldSeed then - for i=1,self.weight:size(1) do - self.weight:select(1, i):apply(function() - return torch.uniform(-stdv, stdv) - end) - end - else - self.weight:uniform(-stdv, stdv) - end - - return self -end - -function LinearNoBias:updateOutput(input) - if input:dim() == 1 then - self.output:resize(self.weight:size(1)) - self.output:mv(self.weight, input) - elseif input:dim() == 2 then - local nframe = input:size(1) - local nElement = self.output:nElement() - self.output:resize(nframe, self.weight:size(1)) - if self.output:nElement() ~= nElement then - self.output:zero() - end - if not self.addBuffer or self.addBuffer:nElement() ~= nframe then - self.addBuffer = input.new(nframe):fill(1) - end - self.output:addmm(0, self.output, 1, input, self.weight:t()) - else - error('input must be vector or matrix') - end - - return self.output -end - -function LinearNoBias:accGradParameters(input, gradOutput, scale) - scale = scale or 1 - if input:dim() == 1 then - self.gradWeight:addr(scale, gradOutput, input) - elseif input:dim() == 2 then - self.gradWeight:addmm(scale, gradOutput:t(), input) - end -end diff --git a/MaskZeroCriterion.lua b/MaskZeroCriterion.lua index bc81f71..617d940 100644 --- a/MaskZeroCriterion.lua +++ b/MaskZeroCriterion.lua @@ -7,7 +7,7 @@ local MaskZeroCriterion, parent = torch.class("nn.MaskZeroCriterion", "nn.Criter function MaskZeroCriterion:__init(criterion, v1) parent.__init(self) - self.criterion = criterion + self.criterion = assert(criterion) assert(torch.isTypeOf(criterion, 'nn.Criterion')) self.v2 = not v1 end @@ -28,11 +28,12 @@ function MaskZeroCriterion:updateOutput(input, target) if self.isEmptyBatch then self.output = 0 else + local first = nn.utils.recursiveGetFirst(input) -- e.g. 0,1,0 -> 1,0,1 self._oneMask = self._oneMask or self.zeroMask.new() self._oneMask:lt(self.zeroMask, 1) -- 1,0,1 -> 1,3 - self._indices = self._indices or torch.isCudaTensor(input) and torch.CudaLongTensor() or torch.LongTensor() + self._indices = self._indices or torch.isCudaTensor(first) and torch.CudaLongTensor() or torch.LongTensor() self._range = self._range or self._indices.new() self._range:range(1,self._oneMask:nElement()) self._indices:maskedSelect(self._range, self._oneMask) @@ -75,10 +76,13 @@ function MaskZeroCriterion:clearState() self.output = nil self.gradInput = nil self._gradInput = nil + self.criterion:clearState() + return parent.clearState(self) end function MaskZeroCriterion:type(type, ...) self:clearState() + self.criterion:type(type, ...) return parent.type(self, type, ...) end diff --git a/Module.lua b/Module.lua index 58d43ff..c613959 100644 --- a/Module.lua +++ b/Module.lua @@ -266,134 +266,6 @@ function Module:getParameters() return Module.flatten(parameters), Module.flatten(gradParameters) end ------------------ serialization (see nn.Serial) ------------------- - -Module.dpnn_mediumEmpty = {'output', 'gradInput', 'momGradParams', 'dpnn_input'} -Module.dpnn_lightEmpty = Module.dpnn_gradParameters --- defaults to heavy serialization -Module.dpnn_serialEmpty = {} - --- sets the serialization behavior of the entire module structure -function Module:serialMode(empty) - assert(torch.type(empty) == 'table', "Expecting table at arg 1") - self.dpnn_serialEmpty = empty - -- set the serial of all encapsulated modules - local function recursiveSerial(tbl) - for k,v in pairs(tbl) do - if torch.isTypeOf(v, 'nn.Module') then - v:serialMode(empty) - elseif torch.type(v) == 'table' then - recursiveSerial(v) - end - end - end - recursiveSerial(self) - return self -end - --- serialMode : serialize everything -function Module:heavySerial() - return self:serialMode({}) -end - --- serialMode : serialize everything except dpnn_mediumEmpty attributes -function Module:mediumSerial() - - self.dpnn_serialEmpty = self.dpnn_mediumEmpty - - -- set the serial of all encapsulated modules - local function recursiveSerial(tbl) - for k,v in pairs(tbl) do - if torch.isTypeOf(v, 'nn.Module') then - v:mediumSerial() - elseif torch.type(v) == 'table' then - recursiveSerial(v) - end - end - end - recursiveSerial(self) - return self -end - --- serialMode : serialize everything except dpnn_mediumEmpty and dpnn_lightEmpty attributes -function Module:lightSerial() - - self.dpnn_serialEmpty = _.clone(self.dpnn_mediumEmpty) - for k,v in ipairs(self.dpnn_lightEmpty) do - table.insert(self.dpnn_serialEmpty, v) - end - - -- set the serial of all encapsulated modules - local function recursiveSerial(tbl) - for k,v in pairs(tbl) do - if torch.isTypeOf(v, 'nn.Module') then - v:lightSerial() - elseif torch.type(v) == 'table' then - recursiveSerial(v) - end - end - end - recursiveSerial(self) - - return self -end - -function Module:getSerialState(states) - states = states or {} - - -- dont get the serial state of the same module twice (reuse existing) - if states[self] then - return states[self] - end - - local _ = require 'moses' - -- returns the object structure as tables (i.e. without metatables) - local function recursiveState(tbl) - local state = _.map(tbl, - function(k,v) - if torch.isTypeOf(tbl, 'nn.Module') and _.contains(tbl.dpnn_serialEmpty, k) then - -- "empties" module attributes found in empty - if torch.type(v) == 'table' then - -- empty table - return {} - elseif torch.isTensor(v) then - -- empty tensor - return v.new() - else - -- not table nor tensor? then serialize as is - return v - end - elseif torch.isTypeOf(v, 'nn.Module') then - -- recursive, yet can be overwritten - return v:getSerialState(states) - elseif torch.type(v) == 'table' then - -- in case it is a table of modules - if not states[v] then - states[v] = recursiveState(v) - end - return states[v] - else - return v - end - end - ) - return state - end - - local state = recursiveState(self) - - -- include typename so that module can be reconstructed from the state - state.dpnn_typename = torch.type(self) - states[self] = state - - return state -end - --- decorates self with nn.Serial -function Module:Serial(tensortype) - return nn.Serial(self, tensortype) -end - ----------------------- for training ----------------------------- -- useful to get the output size diff --git a/ModuleCriterion.lua b/ModuleCriterion.lua deleted file mode 100644 index bfc79ef..0000000 --- a/ModuleCriterion.lua +++ /dev/null @@ -1,44 +0,0 @@ -local ModuleCriterion, parent = torch.class("nn.ModuleCriterion", "nn.Criterion") - -function ModuleCriterion:__init(criterion, inputModule, targetModule, castTarget) - self.inputModule = inputModule - self.targetModule = targetModule - self.castTarget = (castTarget == nil) and true or castTarget - if self.inputModule then - local params = self.inputModule:parameters() - if params and #params > 0 then - print"Warning: nn.ModuleCriterion doesn't support parameter updates" - end - end - self.criterion = criterion -end - -function ModuleCriterion:updateOutput(input, target) - if self.inputModule then - self.input = self.inputModule:forward(input) - end - if self.targetModule then - self.target = self.targetModule:forward(target) - end - self.output = self.criterion:forward(self.input or input, self.target or target) - return self.output -end - -function ModuleCriterion:updateGradInput(input, target) - self.gradInput = self.criterion:backward(self.input or input, self.target or target) - if self.inputModule then - self.gradInput = self.inputModule:backward(input, self.gradInput) - end - return self.gradInput -end - -function ModuleCriterion:type(type, typecache) - if self.inputModule then - self.inputModule:type(type, typecache) - end - if self.castTarget and self.targetModule then - self.targetModule:type(type, typecache) - end - self.criterion:type(type, typecache) - return parent.type(self, type, typecache) -end diff --git a/NCECriterion.lua b/NCECriterion.lua index 1a6b935..d4cde60 100644 --- a/NCECriterion.lua +++ b/NCECriterion.lua @@ -7,32 +7,31 @@ local NCECriterion, parent = torch.class("nn.NCECriterion", "nn.Criterion") local eps = 0.0000001 function NCECriterion:__init() - parent.__init(self) + parent.__init(self) self.sizeAverage = true - - self.gradInput = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()} + self.gradInput = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()} end function NCECriterion:updateOutput(inputTable, target) -- P_model(target), P_model(sample), P_noise(target), P_noise(sample) local Pmt, Pms, Pnt, Pns = unpack(inputTable) local k = Pms:size(2) - + assert(Pmt:dim() == 1) assert(Pms:dim() == 2) assert(Pnt:dim() == 1) assert(Pns:dim() == 2) - + -- equation 5 in ref. A - - -- eq 5.1 : P(origin=model) = Pmt / (Pmt + k*Pnt) + + -- eq 5.1 : P(origin=model) = Pmt / (Pmt + k*Pnt) self._Pom = self._Pom or Pmt.new() self._Pom:resizeAs(Pmt):copy(Pmt) self._Pomdiv = self._Pomdiv or Pmt.new() self._Pomdiv:resizeAs(Pmt):copy(Pmt) self._Pomdiv:add(k, Pnt):add(eps) self._Pom:cdiv(self._Pomdiv) - + -- eq 5.2 : P(origin=noise) = k*Pns / (Pms + k*Pns) self._Pon = self._Pon or Pns.new() self._Pon:resizeAs(Pns):copy(Pns):mul(k) @@ -40,34 +39,35 @@ function NCECriterion:updateOutput(inputTable, target) self._Pondiv:resizeAs(Pms):copy(Pms) self._Pondiv:add(k, Pns):add(eps) self._Pon:cdiv(self._Pondiv) - + -- equation 6 in ref. A - + self._lnPom = self._lnPom or self._Pom.new() self._lnPom:log(self._Pom) - + self._lnPon = self._lnPon or self._Pon.new() self._lnPon:log(self._Pon) - + local lnPomsum = self._lnPom:sum() local lnPonsum = self._lnPon:sum() - + self.output = - (lnPomsum + lnPonsum) - + if self.sizeAverage then self.output = self.output / Pmt:size(1) end - + return self.output end function NCECriterion:updateGradInput(inputTable, target) + self.gradInput = self.gradInput or nn.utils.recursiveNew(inputTable) assert(#self.gradInput == 4) local Pmt, Pms, Pnt, Pns = unpack(inputTable) local k = Pms:size(2) - + -- equation 7 in ref. A - + -- d ln(Pom) / d input = -k*Pnt / ( Pmt * (Pmt + k*Pnt) ) local dlnPom = self.gradInput[1] dlnPom = dlnPom or Pnt.new() @@ -76,7 +76,7 @@ function NCECriterion:updateGradInput(inputTable, target) Pmt:add(eps) dlnPom:cdiv(Pmt) -- d ln(Pmt) / d Pmt = 1 / d Pmt Pmt:add(-eps) - + -- d ln(Pon) / d input = Pms / ( Pms * (Pms + k*Pns) ) local dlnPon = self.gradInput[2] dlnPon = dlnPon or Pms.new() @@ -85,18 +85,35 @@ function NCECriterion:updateGradInput(inputTable, target) Pms:add(eps) dlnPon:cdiv(Pms) -- d ln(Pms) / d Pms = 1 / d Pms Pms:add(-eps) - + if self.gradInput[3]:nElement() ~= Pnt:nElement() then self.gradInput[3]:resizeAs(Pnt):zero() end if self.gradInput[4]:nElement() ~= Pns:nElement() then self.gradInput[4]:resizeAs(Pns):zero() end - + if self.sizeAverage then dlnPom:div(Pmt:size(1)) dlnPon:div(Pmt:size(1)) end - - return self.gradInput + + return self.gradInput +end + +function NCECriterion:clearState() + self._Pom = nil + self._Pomdiv = nil + self._Pon = nil + self._Pondiv = nil + self._lnPon = nil + self._lnPom = nil + self.gradInput = nil + parent.clearState(self) + return self +end + +function NCECriterion:type(...) + self:clearState() + return parent.type(self, ...) end diff --git a/NCEModule.lua b/NCEModule.lua index a3df10c..3172a15 100644 --- a/NCEModule.lua +++ b/NCEModule.lua @@ -4,19 +4,7 @@ ------------------------------------------------------------------------ local _ = require 'moses' local NCEModule, parent = torch.class("nn.NCEModule", "nn.Linear") -NCEModule.version = 6 -- better bias init - --- for efficient serialization using nn.Serial -local empty = _.clone(parent.dpnn_mediumEmpty) -table.insert(empty, 'sampleidx') -table.insert(empty, 'sampleprob') -table.insert(empty, '_noiseidx') -table.insert(empty, '_noiseprob') -table.insert(empty, '_weight') -table.insert(empty, '_gradWeight') -table.insert(empty, '_gradOutput') -table.insert(empty, '_tgradOutput') -NCEModule.dpnn_mediumEmpty = empty +NCEModule.version = 7 -- remove support for nn.Serial; use clearState() -- for sharedClone local params = _.clone(parent.dpnn_parameters) @@ -333,44 +321,6 @@ function NCEModule:accGradParameters(inputTable, gradOutput, scale) end end -function NCEModule:type(type, cache) - if type then - self.sampleidx = nil - self.sampleprob = nil - self._noiseidx = nil - self._noiseprob = nil - self._metaidx = nil - self._gradOutput = nil - self._tgradOutput = nil - self._gradWeight = nil - self._weight = nil - end - local unigrams = self.unigrams - self.unigrams = nil - local am = self.aliasmultinomial - - local rtn - if type and torch.type(self.weight) == 'torch.MultiCudaTensor' then - assert(type == 'torch.CudaTensor', "Cannot convert a multicuda NCEModule to anything other than cuda") - local weight = self.weight - local gradWeight = self.gradWeight - self.weight = nil - self.gradWeight = nil - - rtn = parent.type(self, type, cache) - - assert(torch.type(self.aliasmultinomial.J) ~= 'torch.CudaTensor') - self.weight = weight - self.gradWeight = gradWeight - else - rtn = parent.type(self, type, cache) - end - - self.unigrams = unigrams - self.aliasmultinomial = am - return rtn -end - function NCEModule:noiseProb(sampleprob, sampleidx) assert(sampleprob) assert(sampleidx) @@ -404,6 +354,9 @@ function NCEModule:clearState() self._noiseprob = nil self._tgradOutput = nil self._gradOutput = nil + self._gradWeight = nil + self._weight = nil + self._metaidx = nil if torch.isTensor(self.output) then self.output:set() else @@ -416,6 +369,36 @@ function NCEModule:clearState() end end +function NCEModule:type(type, cache) + if type then + self:clearState() + end + local unigrams = self.unigrams + self.unigrams = nil + local am = self.aliasmultinomial + + local rtn + if type and torch.type(self.weight) == 'torch.MultiCudaTensor' then + assert(type == 'torch.CudaTensor', "Cannot convert a multicuda NCEModule to anything other than cuda") + local weight = self.weight + local gradWeight = self.gradWeight + self.weight = nil + self.gradWeight = nil + + rtn = parent.type(self, type, cache) + + assert(torch.type(self.aliasmultinomial.J) ~= 'torch.CudaTensor') + self.weight = weight + self.gradWeight = gradWeight + else + rtn = parent.type(self, type, cache) + end + + self.unigrams = unigrams + self.aliasmultinomial = am + return rtn +end + function NCEModule:multicuda(device1, device2) assert(device1 and device2, "specify two devices as arguments") require 'torchx' diff --git a/NormStabilizer.lua b/NormStabilizer.lua index 3e11f8f..7b56a60 100644 --- a/NormStabilizer.lua +++ b/NormStabilizer.lua @@ -21,7 +21,6 @@ function NS:_updateOutput(input) assert(input:dim() == 2) local output if self.train ~= false then - self:recycle() local rm = self:getStepModule(self.step) output = rm:updateOutput(input) -- in training mode, we also calculate norm of hidden state diff --git a/OneHot.lua b/OneHot.lua deleted file mode 100644 index 702e162..0000000 --- a/OneHot.lua +++ /dev/null @@ -1,65 +0,0 @@ -local OneHot, parent = torch.class('nn.OneHot', 'nn.Module') - --- adapted from https://github.com/karpathy/char-rnn --- and https://github.com/hughperkins/char-lstm - -function OneHot:__init(outputSize) - parent.__init(self) - self.outputSize = outputSize -end - -function OneHot:updateOutput(input) - local size - if type(input) == 'number' then - if self:type() == 'torch.CudaTensor' then - self._single = self._single or torch.CudaTensor():resize(1); - else - self._single = self._single or torch.LongTensor():resize(1); - end - self._single[1] = input - input = self._single; - size = {} - else - size = input:size():totable() - end - table.insert(size, self.outputSize) - - self.output:resize(unpack(size)):zero() - - size[#size] = 1 - local input_ = input:view(unpack(size)) - - if torch.type(input) == 'torch.CudaTensor' or torch.type(input) == 'torch.ClTensor' then - self.output:scatter(self.output:dim(), input_, 1) - else - if torch.type(self.output) == 'torch.CudaTensor' then - -- input is not cuda, module is, cast input to cuda - self._input = self._input or torch.CudaTensor() - self._input:resize(input_:size()):copy(input_) - input_ = self._input - elseif torch.type(input) ~= 'torch.LongTensor' then - -- input is not long, module isnot cuda, cast input to long - self._input = self._input or torch.LongTensor() - self._input:resize(input_:size()):copy(input_) - input_ = self._input - end - self.output:scatter(self.output:dim(), input_, 1) - end - - return self.output -end - -function OneHot:updateGradInput(input, gradOutput) - if type(input) == 'number' then - return 0 - else - self.gradInput:resize(input:size()):zero() - return self.gradInput - end -end - -function OneHot:type(type, typecache) - self._single = nil - self._input = nil - return parent.type(self, type, typecache) -end diff --git a/PCAColorTransform.lua b/PCAColorTransform.lua deleted file mode 100644 index 69f16d1..0000000 --- a/PCAColorTransform.lua +++ /dev/null @@ -1,117 +0,0 @@ ---[[ - Color transformation module: Commonly used data augmentation technique. - Random color noise is added to input image/images based on the Principal - Component Analysis (PCA) of pixel values. - - Arguments - -> eigenVectors: Each row represent an eigen vector. - -> eigenValues: Corresponding eigen values. - -> std: std of gaussian distribution for augmentation (default 0.1). ---]] - -local PCAColorTransform, Parent = torch.class('nn.PCAColorTransform', 'nn.Module') - -function PCAColorTransform:__init(inputChannels, eigenVectors, eigenValues, std) - Parent.__init(self) - - self.train = true - self.inputChannels = inputChannels - assert(inputChannels == eigenVectors:size(1), - "Number of input channels do not match number of eigen vectors.") - assert(eigenVectors:size(2) == eigenVectors:size(1), - "Invalid dimensionality: eigen vectors.") - assert(inputChannels == eigenValues:nElement(), - "Number of input channels do not match number of eigen values.") - - self.eigenVectors = eigenVectors - self.eigenValues = eigenValues - self.std = std or 0.1 -end - -function PCAColorTransform:updateOutput(input) - self.output:resizeAs(input):copy(input) - if self.train then - self.noise = self.noise or self.output.new() - self.alphas = self.alphas or self.output.new() - self._tempNoise = self._tempNoise or self.output.new() - self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new() - self._tempNoiseSamples = self._tempNoiseSamples or self.output.new() - self._tempLambda = self._tempLambda or self.output.new() - self._tempLambdaExpanded = self._tempLambdaExpanded or self.output.new() - - if self.output:nDimension() == 4 then - local batchSize = self.output:size(1) - local channels = self.output:size(2) - local height = self.output:size(3) - local width = self.output:size(4) - assert(channels == self.inputChannels) - - -- Randomly sample noise for each channel and scale by eigen values - self.alphas:resize(channels, batchSize) - self.alphas:normal(0, self.std) - self._tempLambda = self.eigenValues:view(self.inputChannels, 1) - self._tempLambdaExpanded = self._tempLambda:expand(channels, batchSize) - self.alphas:cmul(self._tempLambdaExpanded) - - -- Scale by eigen vectors - self.noise:resize(batchSize, self.inputChannels):zero() - self.noise:t():addmm(self.eigenVectors, self.alphas) - - -- Add noise to the input - self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1) - self._tempNoiseExpanded:expand(self._tempNoise, batchSize, - channels, height, width) - self.output:add(self._tempNoiseExpanded) - - elseif self.output:nDimension() == 3 then - local channels = self.output:size(1) - local height = self.output:size(2) - local width = self.output:size(3) - assert(channels == self.inputChannels) - - -- Randomly sample noise for each channel and scale by eigen values - self.alphas:resize(channels, 1) - self.alphas:normal(0, self.std) - self._tempLambda = self.eigenValues:view(self.inputChannels, 1) - self._tempLambdaExpanded = self._tempLambda:expand(channels, 1) - self.alphas:cmul(self._tempLambdaExpanded) - - -- Scale by eigen vectors - self.noise:resize(1, self.inputChannels):zero() - self.noise:t():addmm(self.eigenVectors, self.alphas) - - -- Add noise to the input - self._tempNoise = self.noise:view(self.inputChannels, 1, 1) - self._tempNoiseExpanded:expand(self._tempNoise, channels, - height, width) - self.output:add(self._tempNoiseExpanded) - else - error("Invalid input dimensionality.") - end - end - return self.output -end - -function PCAColorTransform:updateGradInput(input, gradOutput) - if self.train then - self.gradInput:resizeAs(gradOutput):copy(gradOutput) - else - error('backprop only defined while training') - end - return self.gradInput -end - -function PCAColorTransform:type(type, tensorCache) - self.noise = nil - self.alphas = nil - self._tempLambda = nil - self._tempLambdaExpanded = nil - self._tempNoise = nil - self._tempNoiseExpanded = nil - Parent.type(self, type, tensorCache) -end - -function PCAColorTransform:__tostring__() - return string.format('%s channels: %d, std: %f', torch.type(self), - self.inputChannels, self.std) -end diff --git a/Padding.lua b/Padding.lua deleted file mode 100644 index b3b8f03..0000000 --- a/Padding.lua +++ /dev/null @@ -1,52 +0,0 @@ -local Padding, parent -if nn.Padding then -- prevent name conflicts with nnx - Padding, parent = nn.Padding, nn.Module -else - Padding, parent = torch.class('nn.Padding', 'nn.Module') -end - --- pad can be positive (right) negative (left) -function Padding:__init(dim, pad, nInputDim, value) - self.dim = dim - self.pad = pad - self.nInputDim = nInputDim - self.value = value or 0 - self.outputSize = torch.LongStorage() - parent.__init(self) -end - -function Padding:updateOutput(input) - self.outputSize:resize(input:dim()) - self.outputSize:copy(input:size()) - local dim = self.dim - if self.nInputDim and input:dim() ~= self.nInputDim then - dim = dim + 1 - end - self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad) - self.output:resize(self.outputSize) - self.output:fill(self.value) - local outputWindow - if self.pad > 0 then - outputWindow = self.output:narrow(dim, 1, input:size(dim)) - else - outputWindow = self.output:narrow(dim, 1 - self.pad, input:size(dim)) - end - outputWindow:copy(input) - return self.output -end - -function Padding:updateGradInput(input, gradOutput) - self.gradInput:resizeAs(input) - local dim = self.dim - if self.nInputDim and input:dim() ~= self.nInputDim then - dim = dim + 1 - end - local gradOutputWindow - if self.pad > 0 then - gradOutputWindow = gradOutput:narrow(dim, 1, input:size(dim)) - else - gradOutputWindow = gradOutput:narrow(dim, 1 - self.pad, input:size(dim)) - end - self.gradInput:copy(gradOutputWindow:copy(input)) - return self.gradInput -end diff --git a/README.md b/README.md index 63123fb..bea88cf 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ This library includes documentation for the following objects: Modules that consider successive calls to `forward` as different time-steps in a sequence : * [AbstractRecurrent](#rnn.AbstractRecurrent) : an abstract class inherited by `Recurrence` and `RecLSTM`; * [Recurrence](#rnn.Recurrence) : decorates a module that outputs `output(t)` given `{input(t), output(t-1)}`; - * [LookupRNN](#rnn.LookupRNN): implements a simple RNN where the input layer is a `LookupTable`; - * [LinearRNN](#rnn.LinearRNN): implements a simple RNN where the input layer is a `Linear`; + * [LookupRNN](#rnn.LookupRNN): implements a simple RNN where the input layer is a `LookupTable`; + * [LinearRNN](#rnn.LinearRNN): implements a simple RNN where the input layer is a `Linear`; * [RecLSTM](#rnn.RecLSTM) : an LSTM that can be used for real-time RNNs; * [RecGRU](#rnn.RecGRU) : an GRU that can be used for real-time RNNs; * [Recursor](#rnn.Recursor) : decorates a module to make it conform to the [AbstractRecurrent](#rnn.AbstractRecurrent) interface; @@ -21,8 +21,8 @@ Modules that `forward` entire sequences through a decorated `AbstractRecurrent` * [SeqLSTM](#rnn.SeqLSTM) : a faster version of `nn.Sequencer(nn.RecLSTM)` where the `input` and `output` are tensors; * [SeqGRU](#rnn.SeqGRU) : a faster version of `nn.Sequencer(nn.RecGRU)` where the `input` and `output` are tensors; * [BiSequencer](#rnn.BiSequencer) : used for implementing Bidirectional RNNs; - * [SeqBLSTM](#rnn.SeqBLSTM) : bidirectional LSTM that uses two `SeqLSTMs` internally; - * [SeqBGRU](#rnn.SeqBGRU) : bidirectional GRU that uses two `SeqGRUs` internally; + * [SeqBLSTM](#rnn.SeqBLSTM) : bidirectional LSTM that uses two `SeqLSTMs` internally; + * [SeqBGRU](#rnn.SeqBGRU) : bidirectional GRU that uses two `SeqGRUs` internally; * [Repeater](#rnn.Repeater) : repeatedly applies the same input to an `AbstractRecurrent` instance; * [RecurrentAttention](#rnn.RecurrentAttention) : a generalized attention model for [REINFORCE modules](https://github.com/nicholas-leonard/dpnn#nn.Reinforce); @@ -34,6 +34,11 @@ Miscellaneous modules and criterions : * [MaskZeroCriterion](#rnn.MaskZeroCriterion) : zeros the `gradInput` and `loss` rows of the decorated criterion for commensurate * `input` rows which are tensors of zeros (version 1); * `zeroMask` elements which are 1 (version 2); + * [ReverseSequence](#nn.ReverseSequence) : reverse the order of elements in a sequence (table or tensor); + * [ReverseUnreverse](#nn.ReverseUnreverse) : used internally by `nn.BiSequencer` for decorating `bwd` RNN. + * [SpatialGlimpse](#nn.SpatialGlimpse) : takes a fovead glimpse of an image at a given location; + * [NCEModule](#nn.NCEModule) : optimized placeholder for a `Linear` + `SoftMax` using [noise-contrastive estimation](https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf). + * [NCECriterion](#nn.NCECriterion) : criterion exclusively used with [NCEModule](#nn.NCEModule); * [VariableLength](#rnn.VariableLength): decorates a `Sequencer` to accept and produce a table of variable length inputs and outputs; Criterions used for handling sequential inputs and targets : @@ -41,39 +46,6 @@ Criterions used for handling sequential inputs and targets : * [SequencerCriterion](#rnn.SequencerCriterion) : sequentially applies the same criterion to a sequence of inputs and targets; * [RepeaterCriterion](#rnn.RepeaterCriterion) : repeatedly applies the same criterion with the same target on a sequence. - -This package also provides many useful features that aren't part of the main nn package. -These include [sharedClone](#nn.Module.sharedClone), which allows you to clone a module and share -parameters or gradParameters with the original module, without incuring any memory overhead. -We also redefined [type](#nn.Module.type) such that the type-cast preserves Tensor sharing within a structure of modules. - -The package provides the following Modules: - - * [Serial](#nn.Serial) : decorate a module makes its serialized output more compact ; - * [Inception](#nn.Inception) : implements the Inception module of the GoogleLeNet article ; - * [Collapse](#nn.Collapse) : just like `nn.View(-1)`; - * [Convert](#nn.Convert) : convert between different tensor types or shapes; - * [ZipTable](#nn.ZipTable) : zip a table of tables into a table of tables; - * [ZipTableOneToMany](#nn.ZipTableOneToMany) : zip a table of element `el` and table of elements into a table of pairs of element `el` and table elements; - * [CAddTensorTable](#nn.CAddTensorTable) : adds a tensor to a table of tensors of the same size; - * [ReverseSequence](#nn.ReverseSequence) : reverse the order of elements in a sequence (table or tensor); - * [ReverseUnreverse](#nn.ReverseUnreverse) : used internally by `nn.BiSequencer` for decorating `bwd` RNN. - * [PrintSize](#nn.PrintSize) : prints the size of inputs and gradOutputs (useful for debugging); - * [Clip](#nn.Clip) : clips the inputs to a min and max value; - * [Constant](#nn.Constant) : outputs a constant value given an input (which is ignored); - * [SpatialUniformCrop](#nn.SpatialUniformCrop) : uniformly crops patches from a input; - * [SpatialGlimpse](#nn.SpatialGlimpse) : takes a fovead glimpse of an image at a given location; - * [WhiteNoise](#nn.WhiteNoise) : adds isotropic Gaussian noise to the signal when in training mode; - * [OneHot](#nn.OneHot) : transforms a tensor of indices into [one-hot](https://en.wikipedia.org/wiki/One-hot) encoding; - * [Kmeans](#nn.Kmeans) : [Kmeans](https://en.wikipedia.org/wiki/K-means_clustering) clustering layer. Forward computes distances with respect to centroids and returns index of closest centroid. Centroids can be updated using gradient descent. Centroids could be initialized randomly or by using [kmeans++](https://en.wikipedia.org/wiki/K-means%2B%2B) algoirthm; - * [SpatialRegionDropout](#nn.SpatialRegionDropout) : Randomly dropouts a region (top, bottom, leftmost, rightmost) of the input image. Works with batch and any number of channels; - * [FireModule](#nn.FireModule) : FireModule as mentioned in the [SqueezeNet](http://arxiv.org/pdf/1602.07360v1.pdf); - * [NCEModule](#nn.NCEModule) : optimized placeholder for a `Linear` + `SoftMax` using [noise-contrastive estimation](https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf). - * [SpatialFeatNormalization](#nn.SpatialFeatNormalization) : Module for widely used preprocessing step of mean zeroing and standardization for images. - * [SpatialBinaryConvolution](#nn.SpatialBinaryConvolution) : Module for binary spatial convolution (Binary weights) as mentioned in [XNOR-Net](http://arxiv.org/pdf/1603.05279v2.pdf). - * [SimpleColorTransform](#nn.SimpleColorTransform) : Module for adding independent random noise to input image channels. - * [PCAColorTransform](#nn.PCAColorTransform) : Module for adding noise to input image using Principal Components Analysis. - The following modules and criterions can be used to implement the REINFORCE algorithm : * [Reinforce](#nn.Reinforce) : abstract class for REINFORCE modules; @@ -84,14 +56,6 @@ The following modules and criterions can be used to implement the REINFORCE algo * [VRClassReward](#nn.VRClassReward) : criterion for variance-reduced classification-based reward; * [BinaryClassReward](#nn.BinaryClassReward) : criterion for variance-reduced binary classification reward (like `VRClassReward`, but for binary classes); -Additional differentiable criterions - * [BinaryLogisticRegression](#nn.BLR) : criterion for binary logistic regression; - * [SpatialBinaryLogisticRegression](#nn.SpatialBLR) : criterion for pixel wise binary logistic regression; - * [NCECriterion](#nn.NCECriterion) : criterion exclusively used with [NCEModule](#nn.NCEModule). - * [ModuleCriterion](#nn.ModuleCriterion) : adds an optional `inputModule` and `targetModule` before a decorated criterion; - * [BinaryLogisticRegression](#nn.BLR) : criterion for binary logistic regression. - * [SpatialBinaryLogisticRegression](#nn.SpatialBLR) : criterion for pixel wise binary logistic regression. - ## Examples ## @@ -122,7 +86,6 @@ If you are using CUDA : ```bash luarocks install cutorch luarocks install cunn -luarocks install cunnx ``` And don't forget to update this package : @@ -130,7 +93,7 @@ And don't forget to update this package : luarocks install rnn ``` -If that doesn't fix it, open and issue on github. +If that doesn't fix it, open an issue on github. ## AbstractRecurrent ## @@ -1544,224 +1507,6 @@ i.e. each example in a batch has its own scalar reward. Refer to [this example](https://github.com/Element-Research/rnn/blob/master/examples/recurrent-visual-attention.lua) for a complete training script making use of the REINFORCE interface. - -## Serial ## - -```lua -dmodule = nn.Serial(module, [tensortype]) -dmodule:[light,medium,heavy]Serial() -``` - -This module is a decorator that can be used to control the serialization/deserialization -behavior of the encapsulated module. Basically, making the resulting string or -file heavy (the default), medium or light in terms of size. - -Furthermore, when specified, the `tensortype` attribute (e.g *torch.FloatTensor*, *torch.DoubleTensor* and so on.), -determines what type the module will be cast to during serialization. -Note that this will also be the type of the deserialized object. -The default serialization `tensortype` is `nil`, i.e. the module is serialized as is. - -The `heavySerial()` has the serialization process serialize every attribute in the module graph, -which is the default behavior of nn. - -The `mediumSerial()` has the serialization process serialize -everything except the attributes specified in each module's `dpnn_mediumEmpty` -table, which has a default value of `{'output', 'gradInput', 'momGradParams', 'dpnn_input'}`. -During serialization, whether they be tables or Tensors, these attributes are emptied (no storage). -Some modules overwrite the default `Module.dpnn_mediumEmpty` static attribute with their own. - -The `lightSerial()` has the serialization process empty -everything a call to `mediumSerial(type)` would (so it uses `dpnn_mediumEmpty`). -But also empties all the parameter gradients specified by the -attribute `dpnn_gradParameters`, which defaults to `{gradWeight, gradBias}`. - -We recomment using `mediumSerial()` for training, and `lightSerial()` for -production (feed-forward-only models). - - -## Inception ## -References : - - * A. [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842) - * B. [GoogleLeNet](http://image-net.org/challenges/LSVRC/2014/slides/GoogLeNet.pptx) - -```lua -module = nn.Inception(config) -``` - -This module uses `n`+2 parallel "columns". -The original paper uses 2+2 where the first two are (but there could be more than two): - - * 1x1 conv (reduce) -> relu -> 5x5 conv -> relu - * 1x1 conv (reduce) -> relu -> 3x3 conv -> relu - -and where the other two are : - - * 3x3 maxpool -> 1x1 conv (reduce/project) -> relu - * 1x1 conv (reduce) -> relu. - -This module allows the first group of columns to be of any -number while the last group consist of exactly two columns. -The 1x1 convoluations are used to reduce the number of input channels -(or filters) such that the capacity of the network doesn't explode. -We refer to these here has *reduce*. -Since each column seems to have one and only one reduce, their initial -configuration options are specified in lists of n+2 elements. - -The sole argument `config` is a table taking the following key-values : - - * Required Arguments : - * `inputSize` : number of input channels or colors, e.g. 3; - * `outputSize` : numbers of filters in the non-1x1 convolution kernel sizes, e.g. `{32,48}` - * `reduceSize` : numbers of filters in the 1x1 convolutions (reduction) used in each column, e.g. `{48,64,32,32}`. The last 2 are used respectively for the max pooling (projection) column (the last column in the paper) and the column that has nothing but a 1x1 conv (the first column in the paper). This table should have two elements more than the outputSize - * Optional Arguments : - * `reduceStride` : strides of the 1x1 (reduction) convolutions. Defaults to `{1,1,...}`. - * `transfer` : transfer function like `nn.Tanh`,`nn.Sigmoid`, `nn.ReLU`, `nn.Identity`, etc. It is used after each reduction (1x1 convolution) and convolution. Defaults to `nn.ReLU`. - * `batchNorm` : set this to `true` to use batch normalization. Defaults to `false`. Note that batch normalization can be awesome - * `padding` : set this to `true` to add padding to the input of the convolutions such that output width and height are same as that of the original non-padded `input`. Defaults to `true`. - * `kernelSize` : size (`height = width`) of the non-1x1 convolution kernels. Defaults to `{5,3}`. - * `kernelStride` : stride of the kernels (`height = width`) of the convolution. Defaults to `{1,1}` - * `poolSize`: size (`height = width`) of the spatial max pooling used in the next-to-last column. Defaults to 3. - * `poolStride` : stride (`height = width`) of the spatial max pooling. Defaults to 1. - - -For a complete example using this module, refer to the following : - * [deep inception training script](https://github.com/nicholas-leonard/dp/blob/master/examples/deepinception.lua) ; - * [openface facial recognition](https://github.com/cmusatyalab/openface) (the model definition is [here](https://github.com/cmusatyalab/openface/blob/master/models/openface/nn4.def.lua)). - - -## Collapse ## - -```lua -module = nn.Collapse(nInputDim) -``` - -This module is the equivalent of: -``` -view = nn.View(-1) -view:setNumInputDim(nInputDim) -``` -It collapses all non-batch dimensions. This is useful for converting -a spatial feature map to the single dimension required by a dense -hidden layer like Linear. - - -## Convert ## - -```lua -module = nn.Convert([inputShape, outputShape]) -``` -Module to convert between different data formats. -For example, we can flatten images by using : -```lua -module = nn.Convert('bchw', 'bf') -``` -or equivalently -```lua -module = nn.Convert('chw', 'f') -``` -Lets try it with an input: -```lua -print(module:forward(torch.randn(3,2,3,1))) - 0.5692 -0.0190 0.5243 0.7530 0.4230 1.2483 --0.9142 0.6013 0.5608 -1.0417 -1.4014 1.0177 --1.5207 -0.1641 -0.4166 1.4810 -1.1725 -1.0037 -[torch.DoubleTensor of size 3x6] -``` -You could also try: - -```lua -module = nn.Convert('chw', 'hwc') -input = torch.randn(1,2,3,2) -input:select(2,1):fill(1) -input:select(2,2):fill(2) -print(input) -(1,1,.,.) = - 1 1 - 1 1 - 1 1 -(1,2,.,.) = - 2 2 - 2 2 - 2 2 -[torch.DoubleTensor of size 1x2x3x2] -print(module:forward(input)) -(1,1,.,.) = - 1 2 - 1 2 - -(1,2,.,.) = - 1 2 - 1 2 - -(1,3,.,.) = - 1 2 - 1 2 -[torch.DoubleTensor of size 1x3x2x2] -``` - - -Furthermore, it automatically converts the `input` to have the same type as `self.output` -(i.e. the type of the module). -So you can also just use is for automatic input type converions: -```lua -module = nn.Convert() -print(module.output) -- type of module -[torch.DoubleTensor with no dimension] -input = torch.FloatTensor{1,2,3} -print(module:forward(input)) - 1 - 2 - 3 -[torch.DoubleTensor of size 3] -``` - - -## ZipTable ## - -```lua -module = nn.ZipTable() -``` - -Zips a table of tables into a table of tables. - -Example: -```lua -print(module:forward{ {'a1','a2'}, {'b1','b2'}, {'c1','c2'} }) -{ {'a1','b1','c1'}, {'a2','b2','c2'} } -``` - - -## ZipTableOneToMany ## - -```lua -module = nn.ZipTableOneToMany() -``` - -Zips a table of element `el` and table of elements `tab` into a table of tables, where the i-th table contains the element `el` and the i-th element in table `tab` - -Example: -```lua -print(module:forward{ 'el', {'a','b','c'} }) -{ {'el','a'}, {'el','b'}, {'el','c'} } -``` - - -## CAddTensorTable ## - -```lua -module = nn.CAddTensorTable() -``` - -Adds the first element `el` of the input table `tab` to each tensor contained in the second element of `tab`, which is itself a table - -Example: -```lua -print(module:forward{ (0,1,1), {(0,0,0),(1,1,1)} }) -{ (0,1,1), (1,2,2) } -``` - - ## ReverseSequence ## @@ -1805,67 +1550,6 @@ Then the `input` sequences are forwarded (in reverse order) through the `sequenc The resulting `sequencer.output` sequences are reversed with respect to the `input`. Before being returned to the caller, these are unreversed using another `ReverseSequence`. - -## PrintSize ## - -```lua -module = nn.PrintSize(name) -``` - -This module is useful for debugging complicated module composites. -It prints the size of the `input` and `gradOutput` during `forward` -and `backward` propagation respectively. -The `name` is a string used to identify the module along side the printed size. - - -## Clip ## - -```lua -module = nn.Clip(minval, maxval) -``` - -This module clips `input` values such that the output is between `minval` and `maxval`. - - -## Constant ## - -```lua -module = nn.Constant(value, nInputDim) -``` - -This module outputs a constant value given an input. -If `nInputDim` is specified, it uses the input to determine the size of the batch. -The `value` is then replicated over the batch. -Otherwise, the `value` Tensor is output as is. -During `backward`, the returned `gradInput` is a zero Tensor of the same size as the `input`. -This module has no trainable parameters. - -You can use this with nn.ConcatTable() to append constant inputs to an input : - -```lua -nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) -``` - -This is useful when you want to output a value that is independent of the -input to the neural network (see [this example](https://github.com/Element-Research/rnn/blob/master/examples/recurrent-visual-attention.lua)). - - -## SpatialUniformCrop ## - -```lua -module = nn.SpatialUniformCrop(oheight, owidth) -``` - -During training, this module will output a cropped patch of size `oheight, owidth` -within the boundaries of the `input` image. -For each example, a location is sampled from a uniform distribution -such that each possible patch has an equal probability of being sampled. - -During evaluation, the center patch is cropped and output. - -This module is commonly used at the input layer to artificially -augment the size of the dataset to prevent overfitting. - ## SpatialGlimpse ## Ref. A. [Recurrent Model for Visual Attention](http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf) @@ -1892,187 +1576,6 @@ on a region of the input `image`. It is commonly used with the [RecurrentAttention](https://github.com/Element-Research/rnn#rnn.RecurrentAttention) module (see [this example](https://github.com/Element-Research/rnn/blob/master/examples/recurrent-visual-attention.lua)). - -## WhiteNoise ## - -```lua -module = nn.WhiteNoise([mean, stdev]) -``` - -Useful in training [Denoising Autoencoders] (http://arxiv.org/pdf/1507.02672v1.pdf). -Takes `mean` and `stdev` of the normal distribution as input. -Default values for mean and standard deviation are 0 and 0.1 respectively. -With `module:training()`, noise is added during forward. -During `backward` gradients are passed as it is. -With `module:evaluate()` the mean is added to the input. - - -## SpatialRegionDropout ## - -```lua -module = nn.SpatialRegionDropout(p) -``` -Following is an example of `SpatialRegionDropout` outputs on the famous lena image. - -**Input** - -![Lena](tutorials/lena.jpg) - -**Outputs** - -![Lena](tutorials/srd1.jpg) ![Lena](tutorials/srd2.jpg) - - -## FireModule ## -Ref: http://arxiv.org/pdf/1602.07360v1.pdf -```lua -module = nn.FireModule(nInputPlane, s1x1, e1x1, e3x3, activation) -``` -FireModule is comprised of two submodules 1) A *squeeze* convolution module comprised of `1x1` filters followed by 2) an *expand* module that is comprised of a mix of `1x1` and `3x3` convolution filters. -Arguments: `s1x1`: number of `1x1` filters in the squeeze submodule, `e1x1`: number of `1x1` filters in the expand submodule, `e3x3`: number of `3x3` filters in the expand submodule. It is recommended that `s1x1` be less than `(e1x1+e3x3)` if you want to limit the number of input channels to the `3x3` filters in the expand submodule. -FireModule works only with batches, for single sample convert the sample to a batch of size 1. - - -## SpatialFeatNormalization ## -```lua -module = nn.SpatialFeatNormalization(mean, std) -``` -This module normalizies each feature channel of input image based on its corresponding mean and standard deviation scalar values. This module does not learn the `mean` and `std`, they are provided as arguments. - - -## SpatialBinaryConvolution ## - -```lua -module = nn.SpatialBinaryConvolution(nInputPlane, nOutputPlane, kW, kH) -``` -Functioning of SpatialBinaryConvolution is similar to nn/SpatialConvolution. Only difference is that Binary weights are used for forward/backward and floating point weights are used for weight updates. Check **Binary-Weight-Network** section of [XNOR-net](http://arxiv.org/pdf/1603.05279v2.pdf). - - -## SimpleColorTransform ## - -```lua -range = torch.rand(inputChannels) -- Typically range is specified by user. -module = nn.SimpleColorTransform(inputChannels, range) -``` -This module performs a simple data augmentation technique. SimpleColorTransform module adds random noise to each color channel independently. In more advanced data augmentation technique noise is added using principal components of color channels. For that please check **PCAColorTransform** - - -## PCAColorTransform ## - -```lua -eigenVectors = torch.rand(inputChannels, inputChannels) -- Eigen Vectors -eigenValues = torch.rand(inputChannels) -- Eigen -std = 0.1 -- Std deviation of normal distribution with mean zero for noise. -module = nn.PCAColorTransform(inputChannels, eigenVectors, eigenValues, std) -``` -This module performs a data augmentation using Principal Component analysis of pixel values. When in training mode, mulitples of principal components are added to input image pixels. Magnitude of value added (noise) is dependent upon the corresponding eigen value and a random value sampled from a Gaussian distribution with mean zero and `std` (default 0.1) standard deviation. This technique was used in the famous [AlexNet](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) paper. - - -## OneHot ## - -```lua -module = nn.OneHot(outputSize) -``` - -Transforms a tensor of `input` indices having integer values between 1 and `outputSize` into -a tensor of one-hot vectors of size `outputSize`. - -Forward an index to get a one-hot vector : - -```lua -> module = nn.OneHot(5) -- 5 classes -> module:forward(torch.LongTensor{3}) - 0 0 1 0 0 -[torch.DoubleTensor of size 1x5] -``` - -Forward a batch of 3 indices. Notice that these need not be stored as `torch.LongTensor` : - -```lua -> module:forward(torch.Tensor{3,2,1}) - 0 0 1 0 0 - 0 1 0 0 0 - 1 0 0 0 0 -[torch.DoubleTensor of size 3x5] -``` - -Forward batch of `2 x 3` indices : - -```lua -oh:forward(torch.Tensor{{3,2,1},{1,2,3}}) -(1,.,.) = - 0 0 1 0 0 - 0 1 0 0 0 - 1 0 0 0 0 - -(2,.,.) = - 1 0 0 0 0 - 0 1 0 0 0 - 0 0 1 0 0 -[torch.DoubleTensor of size 2x3x5] -``` - - -## Kmeans ## - -```lua -km = nn.Kmeans(k, dim) -``` - -`k` is the number of centroids and `dim` is the dimensionality of samples. -You can either initialize centroids randomly from input samples or by using *kmeans++* algorithm. - -```lua -km:initRandom(samples) -- Randomly initialize centroids from input samples. -km:initKmeansPlus(samples) -- Use Kmeans++ to initialize centroids. -``` - -Example showing how to use Kmeans module to do standard Kmeans clustering. - -```lua -attempts = 10 -iter = 100 -- Number of iterations -bestKm = nil -bestLoss = math.huge -learningRate = 1 -for j=1, attempts do - local km = nn.Kmeans(k, dim) - km:initKmeansPlus(samples) - for i=1, iter do - km:zeroGradParameters() - km:forward(samples) -- sets km.loss - km:backward(samples, gradOutput) -- gradOutput is ignored - - -- Gradient Descent weight/centroids update - km:updateParameters(learningRate) - end - - if km.loss < bestLoss then - bestLoss = km.loss - bestKm = km:clone() - end -end -``` -`nn.Kmeans()` module maintains loss only for the latest forward. If you want to maintain loss over the whole dataset then you who would need do it my adding the module loss for every forward. - -You can also use `nn.Kmeans()` as an auxillary layer in your network. -A call to `forward` will generate an `output` containing the index of the nearest cluster for each sample in the batch. -The `gradInput` generated by `updateGradInput` will be zero. - - -## ModuleCriterion ## - -```lua -criterion = nn.ModuleCriterion(criterion [, inputModule, targetModule, castTarget]) -``` - -This criterion decorates a `criterion` by allowing the `input` and `target` to be -fed through an optional `inputModule` and `targetModule` before being passed to the -`criterion`. The `inputModule` must not contain parameters as these would not be updated. - -When `castTarget = true` (the default), the `targetModule` is cast along with the `inputModule` and -`criterion`. Otherwise, the `targetModule` isn't. - ## NCEModule Ref. A [RNNLM training with NCE for Speech Recognition](https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf) @@ -2350,40 +1853,3 @@ So basically, the `input` is still a table of two tensors. The first input tensor is of size `batchsize` containing Bernoulli probabilities. The second input tensor is the baseline prediction described in `VRClassReward`. The targets contain zeros and ones. - - -## BinaryLogisticRegression ## -Ref A. [Learning to Segment Object Candidates](http://arxiv.org/pdf/1506.06204v2.pdf) -This criterion implements the score criterion mentioned in (ref. A). - -```lua -criterion = nn.BinaryLogisticRegression() -``` - -BinaryLogisticRegression implements following cost function for binary classification. - -``` - - log( 1 + exp( -y_k * score(x_k) ) ) - -``` -where `y_k` is binary target `score(x_k)` is the corresponding prediction. `y_k` has value `{-1, +1}` and `score(x_k)` has value in `[-1, +1]`. - - -## SpatialBinaryLogisticRegression ## -Ref A. [Learning to Segment Object Candidates](http://arxiv.org/pdf/1506.06204v2.pdf) - -This criterion implements the spatial component of the criterion mentioned in (ref. A). - -```lua -criterion = nn.SpatialBinaryLogisticRegression() -``` - -SpatialBinaryLogisticRegression implements following cost function for binary pixel classification. -``` - 1 -_______ sum_ij [ log( 1 + exp( -m_ij * f_ij ) ) ] - 2*w*h -``` -where `m_ij` is target binary image and `f_ij` is the corresponding prediction. `m_ij` has value `{-1, +1}` and `f_ij` has value in `[-1, +1]`. - diff --git a/RecGRU.lua b/RecGRU.lua index 76ba8b5..f25baa6 100644 --- a/RecGRU.lua +++ b/RecGRU.lua @@ -27,7 +27,6 @@ function RecGRU:_updateOutput(input) -- output(t) = gru{input(t), output(t-1)} local output if self.train ~= false then - self:recycle() local stepmodule = self:getStepModule(self.step) output = stepmodule:updateOutput({input, prevOutput}) else diff --git a/RecLSTM.lua b/RecLSTM.lua index cf1303c..17cbc02 100644 --- a/RecLSTM.lua +++ b/RecLSTM.lua @@ -33,7 +33,6 @@ function RecLSTM:_updateOutput(input) -- output(t), cell(t) = lstm{input(t), output(t-1), cell(t-1)} local output, cell if self.train ~= false then - self:recycle() local stepmodule = self:getStepModule(self.step) -- the actual forward propagation output, cell = unpack(stepmodule:updateOutput{input, prevOutput, prevCell}) diff --git a/Recurrence.lua b/Recurrence.lua index 71dc4fd..dbf0bd8 100644 --- a/Recurrence.lua +++ b/Recurrence.lua @@ -81,7 +81,6 @@ function Recurrence:_updateOutput(input) -- output(t) = stepmodule:forward{input(t), output(t-1)} local output if self.train ~= false then - self:recycle() local stepmodule = self:getStepModule(self.step) -- the actual forward propagation output = stepmodule:updateOutput{input, prevOutput} diff --git a/Recursor.lua b/Recursor.lua index ad59c7f..bfe8278 100644 --- a/Recursor.lua +++ b/Recursor.lua @@ -9,8 +9,6 @@ local Recursor, parent = torch.class('nn.Recursor', 'nn.AbstractRecurrent') function Recursor:_updateOutput(input) local output if self.train ~= false then -- if self.train or self.train == nil then - -- set/save the output states - self:recycle() local stepmodule = self:getStepModule(self.step) output = stepmodule:updateOutput(input) else diff --git a/Sequencer.lua b/Sequencer.lua index faacfd1..7bb93ad 100644 --- a/Sequencer.lua +++ b/Sequencer.lua @@ -18,9 +18,8 @@ function Sequencer:__init(module) end -- we can decorate the module with a Recursor to make it AbstractRecurrent - self.module = (not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module -- backprop through time (BPTT) will be done online (in reverse order of forward) - self.modules = {self.module} + self.modules = {(not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module} self.output = {} self.tableoutput = {} @@ -28,10 +27,6 @@ function Sequencer:__init(module) -- table of buffers used for evaluation self._output = {} - -- so that these buffers aren't serialized : - local _ = require 'moses' - self.dpnn_mediumEmpty = _.clone(self.dpnn_mediumEmpty) - table.insert(self.dpnn_mediumEmpty, '_output') -- default is to forget previous inputs before each forward() self._remember = 'neither' end @@ -46,16 +41,16 @@ function Sequencer:updateOutput(input) end -- Note that the Sequencer hijacks the seqlen attribute of the rnn - self.module:maxBPTTstep(nStep) + self.modules[1]:maxBPTTstep(nStep) if self.train ~= false then -- TRAINING if not (self._remember == 'train' or self._remember == 'both') then - self.module:forget() + self.modules[1]:forget() end self.tableoutput = {} for step=1,nStep do - self.tableoutput[step] = self.module:updateOutput(input[step]) + self.tableoutput[step] = self.modules[1]:updateOutput(input[step]) end if torch.isTensor(input) then @@ -70,13 +65,13 @@ function Sequencer:updateOutput(input) else -- EVALUATION if not (self._remember == 'eval' or self._remember == 'both') then - self.module:forget() + self.modules[1]:forget() end -- during evaluation, recurrent modules reuse memory (i.e. outputs) -- so we need to copy each output into our own table or tensor if torch.isTensor(input) then for step=1,nStep do - local output = self.module:updateOutput(input[step]) + local output = self.modules[1]:updateOutput(input[step]) if step == 1 then self.output = torch.isTensor(self.output) and self.output or output.new() self.output:resize(nStep, unpack(output:size():totable())) @@ -87,7 +82,7 @@ function Sequencer:updateOutput(input) for step=1,nStep do self.tableoutput[step] = nn.utils.recursiveCopy( self.tableoutput[step] or table.remove(self._output, 1), - self.module:updateOutput(input[step]) + self.modules[1]:updateOutput(input[step]) ) end -- remove extra output tensors (save for later) @@ -117,7 +112,7 @@ function Sequencer:updateGradInput(input, gradOutput) -- back-propagate through time self.tablegradinput = {} for step=nStep,1,-1 do - self.tablegradinput[step] = self.module:updateGradInput(input[step], gradOutput[step]) + self.tablegradinput[step] = self.modules[1]:updateGradInput(input[step], gradOutput[step]) end if torch.isTensor(input) then @@ -147,7 +142,7 @@ function Sequencer:accGradParameters(input, gradOutput, scale) -- back-propagate through time for step=nStep,1,-1 do - self.module:accGradParameters(input[step], gradOutput[step], scale) + self.modules[1]:accGradParameters(input[step], gradOutput[step], scale) end end @@ -189,7 +184,7 @@ function Sequencer:clearState() self._output = {} self.tableoutput = {} self.tablegradinput = {} - self.module:clearState() + self.modules[1]:clearState() end Sequencer.__tostring__ = nn.Decorator.__tostring__ diff --git a/Serial.lua b/Serial.lua deleted file mode 100644 index b597de9..0000000 --- a/Serial.lua +++ /dev/null @@ -1,52 +0,0 @@ ------------------------------------------------------------------------- ---[[ Serial ]]-- --- Decorator that modifies the serialization/deserialization --- behaviour of encapsulated module. ------------------------------------------------------------------------- -local _ = require 'moses' -local Serial, parent = torch.class("nn.Serial", "nn.Decorator") - -function Serial:__init(module, tensortype) - parent.__init(self, module) - self.tensortype = tensortype - if self.tensortype then - assert(tensortype:find('torch.*Tensor'), "Expecting tensortype (e.g. torch.LongTensor) at arg1") - end -end - -function Serial:write(file) - local state = self:getSerialState() - - local function recursiveSetMetaTable(state) - for k,v in pairs(state) do - if torch.type(v) == 'table' then - recursiveSetMetaTable(v) - end - end - - if state.dpnn_typename then - torch.setmetatable(state, state.dpnn_typename) - end - end - - -- typecast before serialization (useful for cuda) - recursiveSetMetaTable(state) - - if self.tensortype then - state:type(self.tensortype) - end - - -- removes self's metatable - state = _.map(state, function(k,v) return v end) - - file:writeObject(state) -end - -function Serial:read(file) - local state = file:readObject() - for k,v in pairs(state) do - self[k] = v - end -end - - diff --git a/SimpleColorTransform.lua b/SimpleColorTransform.lua deleted file mode 100644 index 97b83ea..0000000 --- a/SimpleColorTransform.lua +++ /dev/null @@ -1,90 +0,0 @@ ---[[ - Simple Color transformation module: This module implements a simple data - augmentation technique of changing the pixel values of input image by adding - sample sampled small quantities. - Works only ---]] - -local SimpleColorTransform, Parent = torch.class('nn.SimpleColorTransform', 'nn.Module') - -function SimpleColorTransform:__init(inputChannels, range) - Parent.__init(self) - - self.train = true - self.inputChannels = inputChannels - assert(inputChannels == range:nElement(), - "Number of input channels and number of range values don't match.") - self.range = range -end - -function SimpleColorTransform:updateOutput(input) - self.output:resizeAs(input):copy(input) - if self.train then - self.noise = self.noise or self.output.new() - self._tempNoise = self._tempNoise or self.output.new() - self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new() - self._tempNoiseSamples = self._tempNoiseSamples or self.output.new() - - if self.output:nDimension() == 4 then - local batchSize = self.output:size(1) - local channels = self.output:size(2) - local height = self.output:size(3) - local width = self.output:size(4) - assert(channels == self.inputChannels) - - -- Randomly sample noise for each channel - self.noise:resize(batchSize, channels) - for i=1, channels do - self.noise[{{}, {i}}]:uniform(-self.range[i], self.range[i]) - end - self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1) - self._tempNoiseExpanded:expand(self._tempNoise, batchSize, - channels, height, width) - self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded) - :copy(self._tempNoiseExpanded) - self.output:add(self._tempNoiseSamples) - - elseif self.output:nDimension() == 3 then - local channels = self.output:size(1) - local height = self.output:size(2) - local width = self.output:size(3) - assert(channels == self.inputChannels) - - -- Randomly sample noise for each channel - self.noise:resize(channels) - for i=1, channels do - self.noise[i] = torch.uniform(-self.range[i], self.range[i]) - end - self._tempNoise = self.noise:view(self.inputChannels, 1, 1) - self._tempNoiseExpanded:expand(self._tempNoise, channels, - height, width) - self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded) - :copy(self._tempNoiseExpanded) - self.output:add(self._tempNoiseSamples) - else - error("Invalid input dimensionality.") - end - end - return self.output -end - -function SimpleColorTransform:updateGradInput(input, gradOutput) - if self.train then - self.gradInput:resizeAs(gradOutput):copy(gradOutput) - else - error('backprop only defined while training') - end - return self.gradInput -end - -function SimpleColorTransform:type(type, tensorCache) - self.noise = nil - self._tempNoise = nil - self._tempNoiseExpanded = nil - self._tempNoiseSamples = nil - Parent.type(self, type, tensorCache) -end - -function SimpleColorTransform:__tostring__() - return string.format('SimpleColorTransform', torch.type(self)) -end diff --git a/SpatialBatchNormalization.lua b/SpatialBatchNormalization.lua deleted file mode 100644 index 1b2fdf8..0000000 --- a/SpatialBatchNormalization.lua +++ /dev/null @@ -1,12 +0,0 @@ -local BN, parent = nn.SpatialBatchNormalization, nn.Module -local _ = require 'moses' - -local empty = _.clone(parent.dpnn_mediumEmpty) -table.insert(empty, 'buffer') -table.insert(empty, 'buffer2') -table.insert(empty, 'centered') -table.insert(empty, 'std') -table.insert(empty, 'normalized') -table.insert(empty, 'output') -table.insert(empty, 'gradInput') -BN.dpnn_mediumEmpty = empty diff --git a/SpatialBinaryConvolution.lua b/SpatialBinaryConvolution.lua deleted file mode 100644 index 6365f8e..0000000 --- a/SpatialBinaryConvolution.lua +++ /dev/null @@ -1,173 +0,0 @@ --- Reference: http://arxiv.org/abs/1603.05279 --- We use floating point Matrix-Matrix multiplication as in SpatialConvolution. --- Filters are made binary {-1, +1} using Sign. --- Convolution output is scaled by L1-norm of the filters. - --- Inheriting nn/SpatialConvolution. - -local SpatialBinaryConvolution, parent = torch.class('nn.SpatialBinaryConvolution', 'nn.SpatialConvolution') - -function SpatialBinaryConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) - parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) - parent.noBias(self) - - self.iwh = self.nInputPlane * self.kW * self.kH - self.owh = self.nOutputPlane * self.kW * self.kH - self.train = true -end - -function SpatialBinaryConvolution:training() - self.train = true -end - -function SpatialBinaryConvolution:evaluate() - self.train = false -end - --- Function to binarize weights and compute L1 norms -function SpatialBinaryConvolution:binarizeWeight() - self.tempWeight = self.tempWeight or self.weight.new() - - -- Grad Input alphas - self.gradInputAlphas = self.gradInputAlphas or self.weight.new() - self.gradInputAlphas:resize(self.nInputPlane) - - local temp = self.weight:transpose(1,2) - self.tempWeight:resizeAs(temp):copy(temp) - self.gradInputAlphas:norm(self.tempWeight:view(self.nInputPlane, -1), 1, 2) - self.gradInputAlphas:div(self.owh) -- 1/owh - - -- alphas - self.tempWeight:resizeAs(self.weight):copy(self.weight) - self.alphas = self.alphas or self.weight.new() - self.alphas:resize(self.nOutputPlane) - self.alphas:norm(self.weight:view(self.nOutputPlane, -1), 1, 2) - self.alphas:div(self.iwh) -- 1/iwh - - -- Binarize weights - if not self.wmask then - if torch.type(self.weight) == 'torch.CudaTensor' then - self.wmask = torch.CudaTensor() - else - self.wmask = torch.ByteTensor() - end - end - - -- Binarizing weights - self.weight.ge(self.wmask, self.weight, 0) - self.weight[self.wmask] = 1 - self.weight.lt(self.wmask, self.weight, 0) - self.weight[self.wmask] = -1 -end - -function SpatialBinaryConvolution:updateOutput(input) - -- Binarize Weights - self.binarizeWeight(self) - - -- Convolution - self.output = parent.updateOutput(self, input) - - -- Scale output by alphas - self._tempAlphas = self._tempAlphas or self.output.new() - self._tempAlphasExpanded = self._tempAlphasExpanded or self.output.new() - self._tempAlphasSamples = self._tempAlphasSamples or self.output.new() - if self.output:nDimension() == 4 then - local batchSize = self.output:size(1) - local height = self.output:size(3) - local width = self.output:size(4) - - self._tempAlphas = self.alphas:view(1, self.nOutputPlane, 1, 1) - self._tempAlphasExpanded:expand(self._tempAlphas, batchSize, - self.nOutputPlane, height, width) - self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded) - :copy(self._tempAlphasExpanded) - self.output:cmul(self._tempAlphasSamples) - else - local height = self.output:size(2) - local width = self.output:size(3) - - self._tempAlphas = self.alphas:view(self.nOutputPlane, 1, 1) - self._tempAlphasExpanded:expand(self._tempAlphas, self.nOutputPlane, - height, width) - self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded) - :copy(self._tempAlphasExpanded) - self.output:cmul(self._tempAlphasSamples) - end - - -- In evaluate mode. - if not self.train then self.weight:copy(self.tempWeight) end - - return self.output -end - -function SpatialBinaryConvolution:updateGradInput(input, gradOutput) - self.gradInput = parent.updateGradInput(self, input, gradOutput) - - -- Scale gradInput by gradAlphas - self._tempGradAlphas = self._temp or self.gradInput.new() - self._tempGradAlphasExpanded = self._temp or self.gradInput.new() - self._tempGradAlphasSamples = self._temp or self.gradInput.new() - if self.gradInput:nDimension() == 4 then - local batchSize = self.gradInput:size(1) - local height = self.gradInput:size(3) - local width = self.gradInput:size(4) - - self._tempGradAlphas = self.gradInputAlphas:view(1, self.nInputPlane, - 1, 1) - self._tempGradAlphasExpanded:expand(self._tempGradAlphas, - batchSize, self.nInputPlane, - height, width) - self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded) - :copy(self._tempGradAlphasExpanded) - - self.gradInput:cmul(self._tempGradAlphasSamples) - else - local height = self.gradInput:size(2) - local width = self.gradInput:size(3) - - self._tempGradAlphas = self.gradInputAlphas:view(self.nInputPlane, - 1, 1) - self._tempGradAlphasExpanded:expand(self._tempGradAlphas, - self.nInputPlane, - height, width) - self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded) - :copy(self._tempGradAlphasExpanded) - - self.gradInput:cmul(self._tempGradAlphasSamples) - end - return self.gradInput -end - -function SpatialBinaryConvolution:accGradParameters(input, gradOutput, scale) - - parent.accGradParameters(self, input, gradOutput, scale) - - --[[ - Copy back floating point weights for weight update. - This could be done individually after forward and backward, but to avoid - additional copy is done at the end of backward. - --]] - - self.weight:copy(self.tempWeight) -end - -function SpatialBinaryConvolution:type(type, tensorCache) - self.tempWeight = nil - self.alphas = nil - self.gradInputAlphas = nil - self.wmask = nil - - self._tempAlphas = nil - self._tempAlphasExpanded = nil - self._tempAlphasSamples = nil - - self._tempGradAlphas = nil - self._tempGradAlphasExpanded = nil - self._tempGradAlphasSamples = nil - - parent.type(self, type, tensorCache) -end - -function SpatialBinaryConvolution:__tostring__() - return "Binary Convolution: "..parent.__tostring__(self) -end diff --git a/SpatialBinaryLogisticRegression.lua b/SpatialBinaryLogisticRegression.lua deleted file mode 100644 index 85fba99..0000000 --- a/SpatialBinaryLogisticRegression.lua +++ /dev/null @@ -1,80 +0,0 @@ ------------------------------------------------------------------------- ---[[ SpatialBinaryLogisticRegression ]]-- --- Takes an image of size batchSize x nChannel x width x height as input. --- Computes Binary Logistic Regression Cost. --- Useful for 2 class pixel classification. ------------------------------------------------------------------------- - -local SpatialBinaryLogisticRegression, parent = torch.class('nn.SpatialBinaryLogisticRegression', 'nn.Criterion') - -function SpatialBinaryLogisticRegression:__init() - parent.__init(self) - self.sizeAverage = true -end - -function SpatialBinaryLogisticRegression:updateOutput(input, target) - local inputDim = input:nDimension() - local targetDim = target:nDimension() - - -- Check dimensions of input and target - assert(inputDim == targetDim, "nDimension of input and target don't match.") - assert(inputDim == 4 or inputDim == 3, "Expecting image or batch on images") - - for i=1,inputDim do - assert(input:size(i) == target:size(i), - "Input and target dimensions don't match.") - end - - -- Check batch or single image - if inputDim == 4 then - self._isBatch = true - assert(input:size(2) == 1, "No. of channels should be 1.") - self._k = input:size(1) - self._h = input:size(3) - self._w = input:size(4) - else - self._isBatch = false - assert(input:size(1) == 1, "No. of channels should be 1.") - self._k = 1 - self._h = input:size(2) - self._w = input:size(3) - end - - self._baseExponents = self._baseExponents or input.new() - self._coeff = self._coeff or input.new() - self._logCoeff = self._logCoeff or input.new() - - --Compute exponent = -target*input - self._baseExponents:resize(input:size()):copy(input) - self._baseExponents:cmul(target) - self._baseExponents:mul(-1) - -- Compute exp(exponent) - self._baseExponents:exp() - - self._coeff:resize(input:size()):copy(self._baseExponents) - self._coeff:add(1) - - self._logCoeff:resize(input:size()):copy(self._coeff) - self._logCoeff:log() - - if self.sizeAverage then - return self._logCoeff:sum()/(2 * self._k * self._h * self._w) - else - return self._logCoeff:sum()/(2 * self._h * self._w) - end -end - -function SpatialBinaryLogisticRegression:updateGradInput(input, target) - self.gradInput = self.gradInput or input.new() - local gradInput = self.gradInput - gradInput:resize(target:size()):copy(target) - gradInput:mul(-1) - gradInput:cmul(self._baseExponents) - gradInput:cdiv(self._coeff) - if self.sizeAverage then - gradInput:div(2 * self._k * self._h * self._w) - else - gradInput:div(2 * self._h * self._w) - end - return gradInput -end diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua deleted file mode 100644 index a3144eb..0000000 --- a/SpatialConvolution.lua +++ /dev/null @@ -1,9 +0,0 @@ -local SpatialConvolution, parent = nn.SpatialConvolution, nn.Module -local _ = require 'moses' - -local empty = _.clone(parent.dpnn_mediumEmpty) -table.insert(empty, 'finput') -table.insert(empty, 'fgradinput') -table.insert(empty, '_input') -table.insert(empty, '_gradOutput') -SpatialConvolution.dpnn_mediumEmpty = empty diff --git a/SpatialConvolutionMM.lua b/SpatialConvolutionMM.lua deleted file mode 100644 index 4b50658..0000000 --- a/SpatialConvolutionMM.lua +++ /dev/null @@ -1,3 +0,0 @@ -local SpatialConvolutionMM, parent = nn.SpatialConvolutionMM, nn.Module - -SpatialConvolutionMM.dpnn_mediumEmpty = nn.SpatialConvolution.dpnn_mediumEmpty diff --git a/SpatialFeatNormalization.lua b/SpatialFeatNormalization.lua deleted file mode 100644 index 1aca767..0000000 --- a/SpatialFeatNormalization.lua +++ /dev/null @@ -1,73 +0,0 @@ ---[[ - Color normalization (mean zeroing and dividing by standard deviation). - Basic preprocessing step widely used in training classifier with images. ---]] - -local SpatialFeatNormalization, Parent = torch.class('nn.SpatialFeatNormalization', 'nn.Module') - -function SpatialFeatNormalization:__init(mean, std) - Parent.__init(self) - if mean:dim() ~= 1 then - error(' Mean/Std should be 1D.') - end - self.mean = torch.Tensor() - self.mean:resizeAs(mean):copy(mean) - self.std = torch.Tensor() - self.std:resizeAs(mean) - if std ~= nil then self.std:copy(std) else self.std:fill(1) end - self.noOfFeats = mean:size(1) -end - -function SpatialFeatNormalization:updateOutput(input) - self.output:resizeAs(input):copy(input) - if input:dim() == 4 then - -- Batch of image/s - if input:size(2) ~= self.noOfFeats then - error(' No. of Feats dont match.') - else - for i=1, self.noOfFeats do - self.output[{{}, i, {}, {}}]:add(-self.mean[i]) - self.output[{{}, i, {}, {}}]:div(self.std[i]) - end - end - elseif input:dim() == 3 then - -- single image - if input:size(1) ~= self.noOfFeats then - error(' No. of Feats dont match.') - else - for i=1, self.noOfFeats do - self.output[{i, {}, {}}]:add(-self.mean[i]) - self.output[{i, {}, {}}]:div(self.std[i]) - end - end - else - error(' invalid input dims.') - end - return self.output -end - -function SpatialFeatNormalization:updateGradInput(input, gradOutput) - self.gradInput:resizeAs(gradOutput):copy(gradOutput) - if self.gradInput:dim() == 4 then - -- Batch of image/s - if self.gradInput:size(2) ~= self.noOfFeats then - error(' No. of Feats dont match.') - else - for i=1, self.noOfFeats do - self.gradInput[{{}, i, {}, {}}]:div(self.std[i]) - end - end - elseif self.gradInput:dim() == 3 then - -- single image - if self.gradInput:size(1) ~= self.noOfFeats then - error(' No. of Feats dont match.') - else - for i=1, self.noOfFeats do - self.gradInput[{i, {}, {}}]:div(self.std[i]) - end - end - else - error(' invalid self.gradInput dims.') - end - return self.gradInput -end diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua deleted file mode 100644 index 1d6669c..0000000 --- a/SpatialMaxPooling.lua +++ /dev/null @@ -1,6 +0,0 @@ -local SpatialMaxPooling, parent = nn.SpatialMaxPooling, nn.Module -local _ = require 'moses' - -local empty = _.clone(parent.dpnn_mediumEmpty) -table.insert(empty, 'indices') -SpatialMaxPooling.dpnn_mediumEmpty = empty diff --git a/SpatialRegionDropout.lua b/SpatialRegionDropout.lua deleted file mode 100644 index 78c4a39..0000000 --- a/SpatialRegionDropout.lua +++ /dev/null @@ -1,80 +0,0 @@ ---[[ - Dropout edges rows or columns to simulate imperfect bounding boxes. ---]] - -local SpatialRegionDropout, Parent = torch.class('nn.SpatialRegionDropout', 'nn.Module') - -function SpatialRegionDropout:__init(p) - Parent.__init(self) - self.p = p or 0.2 -- ratio of total number of rows or cols - self.train = true - self.noise = torch.Tensor() - if self.p >= 1 or self.p < 0 then - error(' illegal percentage, must be 0 <= p < 1') - end -end - -function SpatialRegionDropout:setp(p) - self.p = p -end - --- Region Types --- 1: Dropout p ratio of top rows --- 2: Dropout p ratio of bottom rows --- 3: Dropout p ratio of leftmost cols --- 4: Dropout p ratio of rightmost cols -function SpatialRegionDropout:updateOutput(input) - self.output:resizeAs(input):copy(input) - if self.train then - self.noise:resizeAs(input):fill(1) - self.regionType = torch.random(4) - if input:dim() == 4 then - local height = input:size(3) - local width = input:size(4) - if self.regionType == 1 then - self.noise[{{}, {}, {1, math.floor(height*self.p)}}]:fill(0) - elseif self.regionType == 2 then - self.noise[{{}, {}, - {height-math.floor(height*self.p)+1, height}}]:fill(0) - elseif self.regionType == 3 then - self.noise[{{}, {}, {}, {1, math.floor(width*self.p)}}]:fill(0) - elseif self.regionType == 4 then - self.noise[{{}, {}, {}, - {width-math.floor(width*self.p)+1, width}}]:fill(0) - end - elseif input:dim() == 3 then - local height = input:size(2) - local width = input:size(3) - if self.regionType == 1 then - self.noise[{{}, {1, math.floor(height*self.p)}}]:fill(0) - elseif self.regionType == 2 then - self.noise[{{}, - {height-math.floor(height*self.p)+1, height}}]:fill(0) - elseif self.regionType == 3 then - self.noise[{{}, {}, {1, math.floor(width*self.p)}}]:fill(0) - elseif self.regionType == 4 then - self.noise[{{}, {}, - {width-math.floor(width*self.p)+1, width}}]:fill(0) - end - else - error('Input must be 4D (nbatch, nfeat, h, w) or 3D (nfeat, h, w)') - end - self.noise:div(1-self.p) - self.output:cmul(self.noise) - end - return self.output -end - -function SpatialRegionDropout:updateGradInput(input, gradOutput) - if self.train then - self.gradInput:resizeAs(gradOutput):copy(gradOutput) - self.gradInput:cmul(self.noise) - else - error('Backpropagation is only defined for training.') - end - return self.gradInput -end - -function SpatialRegionDropout:__tostring__() - return string.format('%s p: %f', torch.type(self), self.p) -end diff --git a/SpatialUniformCrop.lua b/SpatialUniformCrop.lua deleted file mode 100644 index ba81119..0000000 --- a/SpatialUniformCrop.lua +++ /dev/null @@ -1,121 +0,0 @@ -local SpatialUniformCrop, parent = torch.class("nn.SpatialUniformCrop", "nn.Module") - -function SpatialUniformCrop:__init(oheight, owidth, scale) - nn.require('nnx') - parent.__init(self) - self.scale = scale or nil - if self.scale ~= nil then - assert(torch.type(scale)=='table') - self.scaler = nn.SpatialReSampling{owidth=owidth, oheight=oheight} - end - self.oheight = oheight - self.owidth = owidth or oheight -end - -function SpatialUniformCrop:updateOutput(input) - nn.require('nnx') - input = self:toBatch(input, 3) - - self.output:resize(input:size(1), input:size(2), self.oheight, self.owidth) - self.coord = self.coord or torch.IntTensor() - self.coord:resize(input:size(1), 2) - - if self.scale ~= nil then - self.scales = self.scales or torch.FloatTensor() - self.scales:resize(input:size(1)) - end - - local iH, iW = input:size(3), input:size(4) - if self.train ~= false then - if self.scale ~= nil then - for i=1,input:size(1) do - -- do random crop - local s = torch.uniform(self.scale['min'] or self.scale[1], self.scale['max'] or self.scale[2]) - local soheight = math.ceil(s*self.oheight) - local sowidth = math.ceil(s*self.owidth) - - local h = math.ceil(torch.uniform(1e-2, iH-soheight)) - local w = math.ceil(torch.uniform(1e-2, iW-sowidth)) - - local ch = math.ceil(iH/2 - (iH-soheight)/2 + h) - local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w) - - local h1 = ch - math.ceil(soheight/2) - local w1 = cw - math.ceil(sowidth/2) - if h1 < 1 then h1 = 1 end - if w1 < 1 then w1 = 1 end - - local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth) - - self.output[i]:copy(self.scaler:forward(crop)) - -- save crop coordinates and scale for backward - self.scales[i] = s - self.coord[{i,1}] = h - self.coord[{i,2}] = w - end - else - for i=1,input:size(1) do - -- do random crop - local h1 = math.ceil(torch.uniform(1e-2, iH-self.oheight)) - local w1 = math.ceil(torch.uniform(1e-2, iW-self.owidth)) - local crop = input[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth) - self.output[i]:copy(crop) - -- save crop coordinates for backward - self.coord[{i,1}] = h1 - self.coord[{i,2}] = w1 - end - end - else - -- use center crop - local h1 = math.ceil((iH-self.oheight)/2) - local w1 = math.ceil((iW-self.owidth)/2) - local crop = input:narrow(3,h1,self.oheight):narrow(4,w1,self.owidth) - self.output:copy(crop) - end - - self.output = self:fromBatch(self.output, 1) - return self.output -end - -function SpatialUniformCrop:updateGradInput(input, gradOutput) - input = self:toBatch(input, 3) - gradOutput = self:toBatch(gradOutput, 3) - - self.gradInput:resizeAs(input):zero() - if self.scale ~= nil then - local iH, iW = input:size(3), input:size(4) - for i=1,input:size(1) do - local s = self.scales[i] - local soheight = math.ceil(s*self.oheight) - local sowidth = math.ceil(s*self.owidth) - - local h, w = self.coord[{i,1}], self.coord[{i,2}] - - local ch = math.ceil(iH/2 - (iH-soheight)/2 + h) - local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w) - - local h1 = ch - math.ceil(soheight/2) - local w1 = cw - math.ceil(sowidth/2) - if h1 < 1 then h1 = 1 end - if w1 < 1 then w1 = 1 end - - local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth) - local samplerGradInput = self.scaler:updateGradInput(crop, gradOutput[i]) - - self.gradInput[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth):copy(samplerGradInput) - end - else - for i=1,input:size(1) do - local h1, w1 = self.coord[{i,1}], self.coord[{i,2}] - self.gradInput[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth):copy(gradOutput[i]) - end - end - - self.gradInput = self:fromBatch(self.gradInput, 1) - return self.gradInput -end - -function SpatialUniformCrop:type(type, cache) - self.coord = nil - return parent.type(self, type, cache) -end diff --git a/WhiteNoise.lua b/WhiteNoise.lua deleted file mode 100644 index 518e749..0000000 --- a/WhiteNoise.lua +++ /dev/null @@ -1,38 +0,0 @@ -local WhiteNoise, Parent = torch.class('nn.WhiteNoise', 'nn.Module') - -function WhiteNoise:__init(mean, std) - Parent.__init(self) - -- std corresponds to 50% for MNIST training data std. - self.mean = mean or 0 - self.std = std or 0.1 - self.noise = torch.Tensor() -end - -function WhiteNoise:updateOutput(input) - self.output:resizeAs(input):copy(input) - if self.train ~= false then - self.noise:resizeAs(input) - self.noise:normal(self.mean, self.std) - self.output:add(self.noise) - else - if self.mean ~= 0 then - self.output:add(self.mean) - end - end - return self.output -end - -function WhiteNoise:updateGradInput(input, gradOutput) - if self.train ~= false then - -- Simply return the gradients. - self.gradInput:resizeAs(gradOutput):copy(gradOutput) - else - error('backprop only defined while training') - end - return self.gradInput -end - -function WhiteNoise:__tostring__() - return string.format('%s mean: %f, std: %f', - torch.type(self), self.mean, self.std) -end diff --git a/ZeroGrad.lua b/ZeroGrad.lua deleted file mode 100644 index 24286a4..0000000 --- a/ZeroGrad.lua +++ /dev/null @@ -1,34 +0,0 @@ -local ZeroGrad, parent -if nn.ZeroGrad then -- prevent name conflicts with nnx - ZeroGrad, parent = nn.ZeroGrad, nn.Module -else - ZeroGrad, parent = torch.class('nn.ZeroGrad', 'nn.Module') -end - -local function recursiveZero(t1,t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveZero(t1[key], t2[key]) - end - elseif torch.isTensor(t2) then - t1 = torch.isTensor(t1) and t1 or t2.new() - t1:resizeAs(t2):zero() - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end - -function ZeroGrad:updateOutput(input) - self.output:set(input) - return self.output -end - --- the gradient is simply zeroed. --- useful when you don't want to backpropgate through certain paths. -function ZeroGrad:updateGradInput(input, gradOutput) - self.gradInput = recursiveZero(self.gradInput, gradOutput) - return self.gradInput -end diff --git a/ZipTable.lua b/ZipTable.lua deleted file mode 100644 index 55b261d..0000000 --- a/ZipTable.lua +++ /dev/null @@ -1,34 +0,0 @@ -local ZipTable, parent = torch.class('nn.ZipTable', 'nn.Container') - --- input : { {a1,a2}, {b1,b2}, {c1,c2} } --- output : { {a1,b1,c1}, {a2,b2,c2} } -function ZipTable:__init() - parent.__init(self) - self.output = {} - self.gradInput = {} -end - -function ZipTable:updateOutput(inputTable) - self.output = {} - for i,inTable in ipairs(inputTable) do - for j,input in ipairs(inTable) do - local output = self.output[j] or {} - output[i] = input - self.output[j] = output - end - end - return self.output -end - -function ZipTable:updateGradInput(inputTable, gradOutputTable) - self.gradInput = {} - for i,gradOutTable in ipairs(gradOutputTable) do - for j,gradOutput in ipairs(gradOutTable) do - local gradInput = self.gradInput[j] or {} - gradInput[i] = gradOutput - self.gradInput[j] = gradInput - end - end - return self.gradInput -end - diff --git a/ZipTableOneToMany.lua b/ZipTableOneToMany.lua deleted file mode 100644 index fe8b385..0000000 --- a/ZipTableOneToMany.lua +++ /dev/null @@ -1,37 +0,0 @@ -local ZipTableOneToMany, parent = torch.class('nn.ZipTableOneToMany', 'nn.Container') - --- based on ZipTable in dpnn - --- input : { v, {a, b, c} } --- output : { {v,a}, {v,b}, {v,c} } -function ZipTableOneToMany:__init() - parent.__init(self) - self.output = {} - self.gradInput = {} - -- make buffer to update during forward/backward - self.gradInputEl = torch.Tensor() -end - -function ZipTableOneToMany:updateOutput(input) - assert(#input == 2, "input must be table of element and table") - local inputEl, inputTable = input[1], input[2] - self.output = {} - for i,v in ipairs(inputTable) do - self.output[i] = {inputEl, v} - end - return self.output -end - -function ZipTableOneToMany:updateGradInput(input, gradOutput) - assert(#input == 2, "input must be table of element and table") - local inputEl, inputTable = input[1], input[2] - self.gradInputEl:resizeAs(inputEl):zero() - local gradInputTable = {} - for i,gradV in ipairs(gradOutput) do - self.gradInputEl:add(gradV[1]) - gradInputTable[i] = gradV[2] - end - self.gradInput = {self.gradInputEl, gradInputTable} - return self.gradInput -end - diff --git a/deprecated/FastLSTM.lua b/deprecated/FastLSTM.lua index 1995a34..18e1be9 100644 --- a/deprecated/FastLSTM.lua +++ b/deprecated/FastLSTM.lua @@ -60,14 +60,14 @@ function FastLSTM:buildModel() :add(nn.Dropout(self.p,false,false,true,self.mono)) :add(nn.Dropout(self.p,false,false,true,self.mono))) :add(nn.ParallelTable() - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize))) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) + :add(nn.Linear(self.outputSize, self.outputSize):noBias())) :add(nn.JoinTable(2)) else self.i2g = nn.Linear(self.inputSize, 4*self.outputSize) - self.o2g = nn.LinearNoBias(self.outputSize, 4*self.outputSize) + self.o2g = nn.Linear(self.outputSize, 4*self.outputSize):noBias() end if self.usenngraph or self.bn then diff --git a/deprecated/GRU.lua b/deprecated/GRU.lua index 460b24d..318d2f5 100644 --- a/deprecated/GRU.lua +++ b/deprecated/GRU.lua @@ -54,12 +54,12 @@ function GRU:buildModel() :add(nn.Dropout(self.p,false,false,true,self.mono)) :add(nn.Dropout(self.p,false,false,true,self.mono))) :add(nn.ParallelTable() - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize))) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) + :add(nn.Linear(self.outputSize, self.outputSize):noBias())) :add(nn.JoinTable(2)) else self.i2g = nn.Linear(self.inputSize, 2*self.outputSize) - self.o2g = nn.LinearNoBias(self.outputSize, 2*self.outputSize) + self.o2g = nn.Linear(self.outputSize, 2*self.outputSize):noBias() end local para = nn.ParallelTable():add(self.i2g):add(self.o2g) @@ -97,7 +97,7 @@ function GRU:buildModel() t2:add(nn.Dropout(self.p,false,false,true,self.mono)) end t1:add(nn.Linear(self.inputSize, self.outputSize)) - t2:add(nn.LinearNoBias(self.outputSize, self.outputSize)) + t2:add(nn.Linear(self.outputSize, self.outputSize):noBias()) concat:add(t1):add(t2) hidden:add(concat):add(nn.CAddTable()):add(nn.Tanh()) @@ -132,7 +132,6 @@ function GRU:_updateOutput(input) -- output(t) = gru{input(t), output(t-1)} local output if self.train ~= false then - self:recycle() local stepmodule = self:getStepModule(self.step) -- the actual forward propagation output = stepmodule:updateOutput{input, prevOutput} diff --git a/deprecated/LSTM.lua b/deprecated/LSTM.lua index 5c4560c..13784b4 100644 --- a/deprecated/LSTM.lua +++ b/deprecated/LSTM.lua @@ -49,7 +49,7 @@ function LSTM:buildGate() :add(nn.Linear(self.inputSize, self.outputSize)) local output2gate = nn.Sequential() :add(nn.Dropout(self.p,false,false,true,self.mono)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) local para = nn.ParallelTable() para:add(input2gate):add(output2gate) if self.cell2gate then @@ -80,7 +80,7 @@ function LSTM:buildHidden() :add(nn.Linear(self.inputSize, self.outputSize)) local output2hidden = nn.Sequential() :add(nn.Dropout(self.p,false,false,true,self.mono)) - :add(nn.LinearNoBias(self.outputSize, self.outputSize)) + :add(nn.Linear(self.outputSize, self.outputSize):noBias()) local para = nn.ParallelTable() para:add(input2hidden):add(output2hidden) hidden:add(para) @@ -190,7 +190,6 @@ function LSTM:updateOutput(input) -- output(t), cell(t) = lstm{input(t), output(t-1), cell(t-1)} local output, cell if self.train ~= false then - self:recycle() local stepmodule = self:getStepModule(self.step) -- the actual forward propagation output, cell = unpack(stepmodule:updateOutput{input, prevOutput, prevCell}) diff --git a/examples/README.md b/examples/README.md index e68be30..294db77 100644 --- a/examples/README.md +++ b/examples/README.md @@ -30,4 +30,3 @@ These scripts showcases the fundamental principles of the package. * [rnn-benchmarks](https://github.com/glample/rnn-benchmarks) : benchmarks comparing Torch (using this library), Theano and TensorFlow. * [dataload](https://github.com/Element-Research/dataload) : a collection of torch dataset loaders; * A brief (1 hours) overview of Torch7, which includes some details about the __rnn__ packages (at the end), is available via this [NVIDIA GTC Webinar video](http://on-demand.gputechconf.com/gtc/2015/webinar/torch7-applied-deep-learning-for-vision-natural-language.mp4). In any case, this presentation gives a nice overview of Logistic Regression, Multi-Layer Perceptrons, Convolutional Neural Networks and Recurrent Neural Networks using Torch7; - * [Sagar Waghmare](https://github.com/sagarwaghmare69) wrote a nice [tutorial](tutorials/ladder.md) on how to use rnn with nngraph to reproduce the [Lateral Connections in Denoising Autoencoders Support Supervised Learning](http://arxiv.org/pdf/1504.08215.pdf). diff --git a/examples/multigpu-nce-rnnlm.lua b/examples/multigpu-nce-rnnlm.lua index dad3371..055b1f5 100644 --- a/examples/multigpu-nce-rnnlm.lua +++ b/examples/multigpu-nce-rnnlm.lua @@ -189,8 +189,7 @@ if not xplog then xplog.dataset = 'GoogleBillionWords' xplog.vocab = trainset.vocab -- will only serialize params - xplog.model = nn.Serial(lm) - xplog.model:mediumSerial() + xplog.model = lm:sharedClone() xplog.criterion = criterion xplog.targetmodule = targetmodule -- keep a log of NLL for each epoch diff --git a/examples/noise-contrastive-estimate.lua b/examples/noise-contrastive-estimate.lua index b425214..bfbb012 100644 --- a/examples/noise-contrastive-estimate.lua +++ b/examples/noise-contrastive-estimate.lua @@ -129,7 +129,8 @@ if not lm then :add(nn.ZipTable()) -- {{x1,x2,...}, {t1,t2,...}} -> {{x1,t1},{x2,t2},...} -- encapsulate stepmodule into a Sequencer - lm:add(nn.Sequencer(nn.MaskZero(ncemodule))) + local nce = nn.Sequencer(nn.MaskZero(ncemodule)) + lm:add(nce) -- remember previous state between batches lm:remember() @@ -184,8 +185,7 @@ if not xplog then xplog.dataset = 'GoogleBillionWords' xplog.vocab = trainset.vocab -- will only serialize params - xplog.model = nn.Serial(lm) - xplog.model:mediumSerial() + xplog.model = lm:sharedClone() xplog.criterion = criterion xplog.targetmodule = targetmodule -- keep a log of NLL for each epoch @@ -217,7 +217,7 @@ while opt.maxepoch <= 0 or epoch <= opt.maxepoch do inputs = {inputs, targets} -- zero-mask zeroMask = nn.utils.getZeroMaskSequence(inputs[1], zeroMask) - nn.utils.setZeroMask({lm, criterion}, zeroMask, opt.cuda) + nn.utils.setZeroMask({criterion, lm}, zeroMask, opt.cuda) -- forward local outputs = lm:forward(inputs) local err = criterion:forward(outputs, targets) @@ -278,7 +278,7 @@ while opt.maxepoch <= 0 or epoch <= opt.maxepoch do targets = targetmodule:forward(targets) -- zero-mask zeroMask = nn.utils.getZeroMaskSequence(inputs, zeroMask) - nn.utils.setZeroMask({lm, criterion}, zeroMask, opt.cuda) + nn.utils.setZeroMask({criterion, lm}, zeroMask, opt.cuda) -- forward local outputs = lm:forward{inputs, targets} local err = criterion:forward(outputs, targets) diff --git a/examples/recurrent-language-model.lua b/examples/recurrent-language-model.lua index 95e2745..a8fefe0 100644 --- a/examples/recurrent-language-model.lua +++ b/examples/recurrent-language-model.lua @@ -82,7 +82,6 @@ lm:add(lookup) -- input is seqlen x batchsize if opt.dropout > 0 then lm:add(nn.Dropout(opt.dropout)) end -lm:add(nn.SplitTable(1)) -- tensor to table of tensors -- rnn layers local stepmodule = nn.Sequential() -- applied at each time-step @@ -140,17 +139,10 @@ end --[[ loss function ]]-- -local crit = nn.ClassNLLCriterion() - -- target is also seqlen x batchsize. -local targetmodule = nn.SplitTable(1) -if opt.cuda then - targetmodule = nn.Sequential() - :add(nn.Convert()) - :add(targetmodule) -end - -local criterion = nn.SequencerCriterion(crit) +local targetmodule = opt.cuda and nn.Convert() or nn.Identity() +-- NLL is applied to each time-step +local criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) --[[ CUDA ]]-- @@ -160,6 +152,9 @@ if opt.cuda then targetmodule:cuda() end +-- make sure to call getParameters before sharedClone +local params, grad_params = lm:getParameters() + --[[ experiment log ]]-- -- is saved to file every time a new validation minima is found @@ -168,8 +163,7 @@ xplog.opt = opt -- save all hyper-parameters and such xplog.dataset = 'PennTreeBank' xplog.vocab = trainset.vocab -- will only serialize params -xplog.model = nn.Serial(lm) -xplog.model:mediumSerial() +xplog.model = lm:sharedClone() xplog.criterion = criterion xplog.targetmodule = targetmodule -- keep a log of NLL for each epoch @@ -179,8 +173,6 @@ xplog.valppl = {} xplog.minvalppl = 99999999 xplog.epoch = 0 -local params, grad_params = lm:getParameters() - local adamconfig = { beta1 = opt.adamconfig[1], beta2 = opt.adamconfig[2], diff --git a/examples/recurrent-visual-attention.lua b/examples/recurrent-visual-attention.lua index b9bff24..15f5bc5 100644 --- a/examples/recurrent-visual-attention.lua +++ b/examples/recurrent-visual-attention.lua @@ -181,8 +181,7 @@ end local xplog = {} xplog.opt = opt -- save all hyper-parameters and such -- will only serialize params -xplog.model = nn.Serial(agent) -xplog.model:mediumSerial() +xplog.model = agent:sharedClone() xplog.criterion = criterion xplog.targetmodule = targetmodule -- keep a log of NLL for each epoch diff --git a/init.lua b/init.lua index e14443a..dd222d1 100644 --- a/init.lua +++ b/init.lua @@ -33,56 +33,24 @@ require('rnn.Sequential') require('rnn.ParallelTable') require('rnn.LookupTable') require('rnn.Dropout') - --- extensions to existing criterions -require('rnn.Criterion') - --- decorator modules -require('rnn.Serial') - --- extensions to make serialization more efficient -require('rnn.SpatialMaxPooling') -require('rnn.SpatialConvolution') -require('rnn.SpatialConvolutionMM') -require('rnn.SpatialBatchNormalization') require('rnn.BatchNormalization') +-- extensions to existing nn.Criterion +require('rnn.Criterion') -- modules require('rnn.LookupTableMaskZero') require('rnn.MaskZero') -require('rnn.PrintSize') -require('rnn.Convert') -require('rnn.Constant') -require('rnn.Collapse') -require('rnn.ZipTable') -require('rnn.ZipTableOneToMany') -require('rnn.CAddTensorTable') require('rnn.ReverseSequence') -require('rnn.Dictionary') -require('rnn.Inception') -require('rnn.Clip') -require('rnn.SpatialUniformCrop') require('rnn.SpatialGlimpse') -require('rnn.WhiteNoise') require('rnn.ArgMax') require('rnn.CategoricalEntropy') require('rnn.TotalDropout') -require('rnn.Kmeans') -require('rnn.OneHot') -require('rnn.SpatialRegionDropout') -require('rnn.FireModule') -require('rnn.SpatialFeatNormalization') -require('rnn.ZeroGrad') -require('rnn.LinearNoBias') require('rnn.SAdd') require('rnn.CopyGrad') require('rnn.VariableLength') require('rnn.StepLSTM') require('rnn.StepGRU') -require('rnn.SpatialBinaryConvolution') -require('rnn.SimpleColorTransform') -require('rnn.PCAColorTransform') require('rnn.ReverseUnreverse') -- Noise Contrastive Estimation @@ -100,11 +68,6 @@ require('rnn.ReinforceCategorical') require('rnn.VRClassReward') require('rnn.BinaryClassReward') --- criterions -require('rnn.ModuleCriterion') -require('rnn.BinaryLogisticRegression') -require('rnn.SpatialBinaryLogisticRegression') - -- for testing: require('rnn.test') require('rnn.bigtest') diff --git a/scripts/evaluate-rnnlm.lua b/scripts/evaluate-rnnlm.lua index 5c8dba7..865ea2e 100644 --- a/scripts/evaluate-rnnlm.lua +++ b/scripts/evaluate-rnnlm.lua @@ -44,14 +44,14 @@ if opt.dumpcsv then local csvfile = opt.xplogpath:match('([^/]+)[.]t7$')..'.csv' paths.mkdir('learningcurves') csvpath = paths.concat('learningcurves', csvfile) - + local file = io.open(csvpath, 'w') file:write("epoch,trainerr,validerr\n") for i=1,#trainerr do file:write(string.format('%d,%f,%f\n', i, trainerr[i], validerr[i])) end file:close() - + print("CSV file saved to "..csvpath) os.exit() end @@ -134,7 +134,7 @@ if opt.nsample > 0 then end else local sumErr, count = 0, 0 - + for i, inputs, targets in testset:subiter(xplog.opt.seqlen or 100) do inputs:apply(function(x) if x > 0 then @@ -147,7 +147,7 @@ else local err = criterion:forward(outputs, targets) sumErr = sumErr + err end - + if count ~= testset:size() then local meanseqlen = testset:size()/(testset:size() - count) print("mean sequence length : "..meanseqlen) diff --git a/test/bigtest.lua b/test/bigtest.lua index 5facc30..72fd913 100644 --- a/test/bigtest.lua +++ b/test/bigtest.lua @@ -121,7 +121,7 @@ function rnnbigtest.LSTM_char_rnn() end -- evaluate the input sums at once for efficiency local i2h = nn.Linear(input_size_L, 4 * rnn_size)(x):annotate{name='i2h_'..L} - local h2h = nn.LinearNoBias(rnn_size, 4 * rnn_size)(prev_h):annotate{name='h2h_'..L} + local h2h = nn.Linear(rnn_size, 4 * rnn_size):noBias()(prev_h):annotate{name='h2h_'..L} local all_input_sums = nn.CAddTable()({i2h, h2h}) local reshaped = nn.Reshape(4, rnn_size)(all_input_sums) @@ -480,7 +480,7 @@ function rnnbigtest.Reinforce() mlp:add(nn.Linear(inputs:size(2),hiddenSize)) mlp:add(nn.Tanh()) mlp:add(nn.ReinforceNormal(stdev)) - mlp:add(nn.Clip(-1,1)) + mlp:add(nn.Clamp(-1,1)) mlp:add(nn.Linear(hiddenSize, inputs:size(2))) mlp:add(nn.SoftMax()) @@ -548,82 +548,6 @@ function rnnbigtest.Reinforce() train(concat, cost, N, 'ReinforceCategorical') end --- Unit Test Kmeans layer -function rnnbigtest.Kmeans() - local k = 10 - local dim = 5 - local batchSize = 1000 - local input = torch.Tensor(batchSize, dim) - for i=1, batchSize do - input[i]:fill(torch.random(1, k)) - end - - local verbose = false - - local attempts = 10 - local iter = 100 - local bestLoss = 100000000 - local bestKm = nil - local tempLoss = 0 - local learningRate = 1 - - local initTypes = {'random', 'kmeans++'} - local useCudas = {false} - if pcall(function() require 'cunn' end) then - useCudas[2] = true - end - for _, initType in pairs(initTypes) do - for _, useCuda in pairs(useCudas) do - - if useCuda then - input = input:cuda() - else - input = input:double() - end - - sys.tic() - for j=1, attempts do - local km = nn.Kmeans(k, dim) - if useCuda then km:cuda() end - - if initType == 'kmeans++' then - km:initKmeansPlus(input) - else - km:initRandom(input) - end - - for i=1, iter do - km:zeroGradParameters() - - km:forward(input) - km:backward(input, gradOutput) - - -- Gradient descent - km.weight:add(-learningRate, km.gradWeight) - tempLoss = km.loss - end - if verbose then print("Attempt Loss " .. j ..": " .. tempLoss) end - if tempLoss < bestLoss then - bestLoss = tempLoss - end - if (initType == 'kmeans++' and bestLoss < 0.00001) or (initType == 'random' and bestLoss < 500) then - break - end - end - if verbose then - print("InitType: " .. initType .. " useCuda: " .. tostring(useCuda)) - print("Best Loss: " .. bestLoss) - print("Total time: " .. sys.toc()) - end - if initType == 'kmeans++' then - mytester:assert(bestLoss < 0.00001, "Kmeans++ error ("..(useCuda and 'cuda' or 'double')..")") - else - mytester:assert(bestLoss < 500, "Kmeans error ("..(useCuda and 'cuda' or 'double')..")") - end - end - end -end - function rnnbigtest.NCE_benchmark() pcall(function() require 'cunn' end) -- make sure to import cunn before initializing large tensors, else weird segfault... diff --git a/test/test.lua b/test/test.lua index 734b1e3..660883e 100644 --- a/test/test.lua +++ b/test/test.lua @@ -148,7 +148,6 @@ function rnntest.RecurrentAttention() glimpseSensor:add(nn.ReLU()) local glimpse = nn.Sequential() - --glimpse:add(nn.PrintSize("preglimpse")) glimpse:add(nn.ConcatTable():add(locationSensor):add(glimpseSensor)) glimpse:add(nn.JoinTable(1,1)) glimpse:add(nn.Linear(opt.glimpseHiddenSize+opt.locatorHiddenSize, opt.imageHiddenSize)) @@ -2311,8 +2310,8 @@ function rnntest.issue129() output = model:forward(input):clone() end - mytester:assertTensorEq(model1:get(1).running_mean, model:get(2).module.sharedClones[1].modules[1].running_mean, 0.000001) - mytester:assertTensorEq(model:get(2).module.sharedClones[1].modules[1].running_mean, model:get(2).module.modules[1].modules[1].running_mean, 0.0000001) + mytester:assertTensorEq(model1:get(1).running_mean, model:get(2).modules[1].sharedClones[1].modules[1].running_mean, 0.000001) + mytester:assertTensorEq(model:get(2).modules[1].sharedClones[1].modules[1].running_mean, model:get(2).modules[1].modules[1].modules[1].running_mean, 0.0000001) model:evaluate() local output2 = model:forward(input):clone() @@ -4794,188 +4793,6 @@ function rnntest.Module_getParameters() end end -function rnntest.Serial() - function test(mlp, name) - local input = torch.randn(4,3) - local gradOutput = torch.randn(4,7) - local mlp2 = mlp:clone():Serial() - - local output = mlp:forward(input):clone() - local gradInput = mlp:backward(input, gradOutput):clone() - - local output2 = mlp2:forward(input) - local gradInput2 = mlp2:backward(input, gradOutput) - - mytester:assertTensorEq(output, output2, 0.000001, name.." serial forward error") - mytester:assertTensorEq(gradInput, gradInput2, 0.00001, name.." serial backward error") - - mlp2:mediumSerial() - mlp2.tensortype = 'torch.FloatTensor' - local mlp3 = mlp2:clone() - - mytester:assert(mlp3.modules[1].output:nElement() == 0, name.." serial medium empty err") - mytester:assert(torch.type(mlp3.modules[1].output) == 'torch.FloatTensor', name.." serial medium type err") - - mlp:zeroGradParameters() - local output = mlp:forward(input) - local gradInput = mlp:backward(input, gradOutput) - - mlp3:zeroGradParameters() - local output2 = mlp3:forward(input:float()) - local gradInput2 = mlp3:backward(input:float(), gradOutput:float()) - - mytester:assertTensorEq(output:float(), output2, 0.000001, name.." serial forward error") - mytester:assertTensorEq(gradInput:float(), gradInput2, 0.00001, name.." serial backward error") - - local params, gradParams = mlp:parameters() - local params2, gradParams2 = mlp3:parameters() - mytester:assert(#params == #params2) - for i,param in ipairs(params) do - mytester:assertTensorEq(param:float(), params2[i], 0.00001, name.." params err "..i) - mytester:assertTensorEq(gradParams[i]:float(), gradParams2[i], 0.00001, name.." gradParams err "..i) - end - end - - local mlp = nn.Sequential():extend( - nn.Linear(3,4), - nn.Tanh(), - nn.Linear(4,5), - nn.Sequential():extend( - nn.Linear(5,6), - nn.Tanh(), - nn.Linear(6,7) - ) - ) - - test(mlp, 'mlp') - - local seq = nn.Sequential() - seq:add(nn.Repeater(nn.LinearRNN(3,2), 3)) - seq:add(nn.Sequencer(nn.Linear(2,7))) - seq:add(nn.SelectTable(-1)) - test(seq, 'rnn2') -end - -function rnntest.Convert() - -- batch mode - local c = nn.Convert('bchw', 'chwb') - local input = torch.randn(8,3,5,5) - local output = c:forward(input) - local output2 = input:transpose(1,4):transpose(1,3):transpose(1,2) - mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->chwb") - local gradInput = c:backward(input, output) - mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd bchw->chwb") - local c = nn.Convert('bchw', 'bf') - local output = c:forward(input) - local output2 = input:view(8,-1) - mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->bf") - c:float() - local output = c:forward(input:float()) - mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type()") - local output = c:forward(input) - mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float") - -- non-batch mode - local c = nn.Convert('chw', 'hwc') - local input = torch.randn(3,5,5) - local output = c:forward(input) - local output2 = input:transpose(1,3):transpose(1,2) - mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->hwc non-batch") - local gradInput = c:backward(input, output) - mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd chw->hwc non-batch") - local c = nn.Convert('chw', 'f') - local output = c:forward(input) - local output2 = input:view(-1) - mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->bf non-batch") - c:float() - local output = c:forward(input:float()) - mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() non-batch") - local output = c:forward(input) - mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float non-batch") -end - -function rnntest.Collapse() - local c = nn.Collapse(3) - local input = torch.randn(8,3,4,5) - local output = c:forward(input) - mytester:assertTensorEq(input:view(8,-1), output, 0.000001, "Collapse:forward") - local gradInput = c:backward(input, output) - mytester:assertTensorEq(gradInput, input, 0.000001, "Collapse:backward") - mytester:assertTableEq(gradInput:size():totable(), input:size():totable(), 0.000001, "Collapse:backward size") - local input2 = input:transpose(1,4) - local output2 = c:forward(input2) - mytester:assertTensorEq(input2:contiguous():view(5,-1), output2, 0.000001, "Collapse:forward non-contiguous") - local gradInput2 = c:backward(input2, output2) - mytester:assertTensorEq(gradInput2, input2, 0.000001, "Collapse:backward non-contiguous") - mytester:assertTableEq(gradInput2:size():totable(), input2:size():totable(), 0.000001, "Collapse:backward size non-contiguous") -end - -function rnntest.ZipTable() - -- input : { {a1,a2}, {b1,b2}, {c1,c2} } - -- output : { {a1,b1,c1}, {a2,b2,c2} } - local z = nn.ZipTable() - local input = { - {torch.randn(3,4), torch.randn(3,4)}, - {torch.randn(3,4), torch.randn(3,4)}, - {torch.randn(3,4), torch.randn(3,4)} - } - local output = z:forward(input) - mytester:assert(#output == 2, "ZipTable #output") - mytester:assert(#(output[1]) == 3, "ZipTable #output[1]") - mytester:assertTensorEq(input[1][1], output[1][1], 0.000001, "ZipTable input11") - mytester:assertTensorEq(input[1][2], output[2][1], 0.000001, "ZipTable input12") - mytester:assertTensorEq(input[3][2], output[2][3], 0.000001, "ZipTable input32") - local gradInput = z:backward(input, output) - mytester:assert(#gradInput == 3, "ZipTable #gradInput") - mytester:assert(#(gradInput[1]) == 2, "ZipTable #gradInput[1]") - mytester:assertTensorEq(input[1][1], gradInput[1][1], 0.000001, "ZipTable gradInput11") - mytester:assertTensorEq(input[1][2], gradInput[1][2], 0.000001, "ZipTable gradInput12") - mytester:assertTensorEq(input[3][2], gradInput[3][2], 0.000001, "ZipTable gradInput32") -end - -function rnntest.ZipTableOneToMany() - -- input : { v, {a,b,c} } - -- output : { {v,a}, {v,b}, {v,c} } - local z = nn.ZipTableOneToMany() - local input = { torch.randn(3), { torch.randn(4), torch.rand(4), torch.rand(4) } } - local output = z:forward(input) - mytester:assert(#output == 3, "ZipTableOneToMany #output") - mytester:assert(#(output[1]) == 2, "ZipTableOneToMany #output[1]") - mytester:assert(#(output[2]) == 2, "ZipTableOneToMany #output[2]") - mytester:assert(#(output[3]) == 2, "ZipTableOneToMany #output[3]") - mytester:assertTensorEq(input[1], output[1][1], 0.000001, "ZipTableOneToMany input1 output11") - mytester:assertTensorEq(input[1], output[2][1], 0.000001, "ZipTableOneToMany input1 output21") - mytester:assertTensorEq(input[1], output[3][1], 0.000001, "ZipTableOneToMany input1 output31") - mytester:assertTensorEq(input[2][1], output[1][2], 0.000001, "ZipTableOneToMany input21") - mytester:assertTensorEq(input[2][2], output[2][2], 0.000001, "ZipTableOneToMany input22") - mytester:assertTensorEq(input[2][3], output[3][2], 0.000001, "ZipTableOneToMany input23") - local gradInput = z:backward(input, output) - mytester:assert(#gradInput == 2, "ZipTableOneToMany #gradInput") - mytester:assert(#(gradInput[2]) == 3, "ZipTableOneToMany #gradInput[2]") - mytester:assertTensorEq(input[2][1], gradInput[2][1], 0.000001, "ZipTableOneToMany gradInput21") - mytester:assertTensorEq(input[2][2], gradInput[2][2], 0.000001, "ZipTableOneToMany gradInput22") - mytester:assertTensorEq(input[2][3], gradInput[2][3], 0.000001, "ZipTableOneToMany gradInput32") - mytester:assertTensorEq(torch.mul(input[1], 3), gradInput[1], 0.000001, "ZipTableOneToMany gradInput21") -end - -function rnntest.CAddTensorTable() - -- input : { v, {a,b,c} } - -- output : { v+a, v+b, v+c } - local z = nn.CAddTensorTable() - local input = { torch.randn(3), { torch.randn(3), torch.rand(3), torch.rand(3) } } - local output = z:forward(input) - mytester:assert(#output == 3, "CAddTensorTable #output") - mytester:assertTensorEq(input[1]+input[2][1], output[1], 0.00001, "CAddTensorTable input21 output1") - mytester:assertTensorEq(input[1]+input[2][2], output[2], 0.00001, "CAddTensorTable input22 output2") - mytester:assertTensorEq(input[1]+input[2][3], output[3], 0.00001, "CAddTensorTable input23 output3") - local gradInput = z:backward(input, output) - mytester:assert(#gradInput == 2, "CAddTensorTable #gradInput") - mytester:assert(#(gradInput[2]) == 3, "CAddTensorTable #gradInput[2]") - mytester:assertTensorEq(output[1], gradInput[2][1], 0.000001, "CAddTensorTable gradInput21") - mytester:assertTensorEq(output[2], gradInput[2][2], 0.000001, "CAddTensorTable gradInput22") - mytester:assertTensorEq(output[3], gradInput[2][3], 0.000001, "CAddTensorTable gradInput23") - mytester:assertTensorEq(output[1]+output[2]+output[3], gradInput[1], 0.000001, "CAddTensorTable gradInput1") -end - function rnntest.ReverseSequence() -- test table @@ -5018,79 +4835,6 @@ function rnntest.ReverseSequence() end end -function rnntest.Inception() - local size = {8,3,32,32} - local outputSize = {8,16+24+8+12,32,32} - local input = torch.rand(unpack(size)) - local gradOutput = torch.randn(unpack(outputSize)) - local incep = nn.Inception{inputSize=3, outputSize={16,24}, reduceSize={14,16,8,12}} - for i, param in ipairs(incep:parameters()) do - mytester:assert(_.isFinite(param:sum()), 'inception init error') - end - local output = incep:forward(input) - mytester:assertTableEq(output:size():totable(), outputSize, 0.00001) - mytester:assert(_.isFinite(output:sum())) - incep:zeroGradParameters() - local gradInput = incep:backward(input, gradOutput) - mytester:assertTableEq(gradInput:size():totable(), size, 0.00001) - mytester:assert(_.isFinite(gradInput:sum())) - incep:updateParameters(0.1) - for i, param in ipairs(incep:parameters()) do - mytester:assert(_.isFinite(param:sum()), 'inception update error') - end - incep:maxParamNorm(1) - for i, param in ipairs(incep:parameters()) do - mytester:assert(_.isFinite(param:sum()), 'inception maxNorm error') - end -end - -function rnntest.SpatialUniformCrop() - if not pcall(function() require "nnx" end) then return end -- needs the nnx package - local input = torch.Tensor(8,3,10,10):copy(torch.range(1,8):view(8,1,1,1):expand(8,3,10,10)) - local gradOutput = torch.Tensor(8,3,4,4):copy(torch.range(1,8):view(8,1,1,1):expand(8,3,4,4)) - local sc = nn.SpatialUniformCrop(4) - local output, gradInput - for i=1,100 do - output = sc:forward(input) - gradInput = sc:backward(input, gradOutput) - end - for i=1,8 do - mytester:assert(math.abs(output[i]:mean() - i) < 0.0001, "SpatialUniformCrop output err "..i) - mytester:assert(math.abs(gradInput[i]:mean() - ((i*4*4)/(10*10))) < 0.0001, "SpatialUniformCrop gradInput err"..i) - end - - local input = torch.zeros(1, 1, 120, 120) - local temp = input[1]:narrow(2, 30, 60):narrow(3, 30, 60) - temp:fill(1) - local scale = {} - scale['min'] = 0.8 - scale['max'] = 1.2 - - local layer = nn.SpatialUniformCrop(100, 100, scale) - local o = layer:forward(input) - gradInput = layer:backward(input, o) - mytester:assert(gradInput:max() ~= nil, "SpatialUniformCrop scaling error.") -end - -function rnntest.ModuleCriterion() - local input = torch.randn(8,4) - local target = torch.randn(8,4) - local inputModule = nn.Tanh() - local criterion = nn.MSECriterion() - local mc = nn.ModuleCriterion(criterion, inputModule) - - local err = mc:forward(input, target) - local gradInput = mc:backward(input, target) - - local output = inputModule:forward(input) - local err2 = criterion:forward(output, target) - local gradOutput = criterion:backward(output, target) - local gradInput2 = inputModule:backward(input, gradOutput) - - mytester:assert(err == err2, "ModuleCriterion backward err") - mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "ModuleCriterion backward err") -end - function rnntest.ReinforceNormal() local input = torch.randn(500,1000) -- means local gradOutput = torch.Tensor() -- will be ignored @@ -5290,35 +5034,6 @@ function rnntest.BinaryClassReward() mytester:assertTensorEq(rf2.reward, rf.reward, 0.0000001) end -function rnntest.Clip() - local input = torch.randn(200,300) - local gradOutput = torch.randn(200,300) - local minval, maxval = -0.05, 0.1 - local clip = nn.Clip(minval, maxval) - local output = clip:forward(input) - local output2 = input:clone() - local mask = input.new() - mask:gt(input, maxval) - output2[mask:type("torch.ByteTensor")] = maxval - mask:lt(input, minval) - output2[mask:type("torch.ByteTensor")] = minval - mytester:assertTensorEq(output, output2, 0.00001, "Clip forward err") - local gradInput = clip:backward(input, gradOutput) - mytester:assertTensorEq(gradInput, gradOutput, 0.00001, "Clip backward err") -end - -function rnntest.Constant() - local input = torch.randn(20,3,7) - local gradOutput = torch.randn(20,30,6) - local value = torch.randn(30,6) - local const = nn.Constant(value:clone(), 2) - local output = const:forward(input) - local gradInput = const:backward(input, output) - local output2 = value:view(1,30,6):expand(20,30,6) - mytester:assertTensorEq(output2, output, 0.000001, "Constant forward err") - mytester:assertTensorEq(gradInput, input:zero(), 0.000001, "Constant backward err") -end - function rnntest.SpatialGlimpse() if not pcall(function() require "image" end) then return end -- needs the image package if not pcall(function() require "nnx" end) then return end -- needs the nnx package @@ -6023,417 +5738,6 @@ function rnntest.TotalDropout() mytester:assert(nOne < 10 and nOne > 1, "TotalDropout bernoulli error") end - --- Unit Test WhiteNoise -function rnntest.WhiteNoise() - local input = torch.zeros(3, 28, 28) - local addNoise = nn.WhiteNoise() - local output = addNoise:forward(input) - local meanValue = output:mean() - local stdValue = output:std() - mytester:assert(meanValue > -0.01 and meanValue < 0.01) - mytester:assert(stdValue < 0.15 and stdValue >= 0) - - -- Evaluate - addNoise:evaluate() - output = addNoise:forward(input) - meanValue = output:mean() - stdValue = output:std() - mytester:assert(meanValue == 0) - mytester:assert(stdValue == 0) - - -- backprop - addNoise:training() - local gradOutput = torch.rand(3, 28, 28) - local gradInput = addNoise:updateGradInput(input, gradOutput) - mytester:assertTensorEq(gradOutput, gradInput, 0.000001, "WhiteNoise backward err") -end - --- Unit Test SpatialBinaryLogisticRegression criterion -function rnntest.SpatialBinaryLogisticRegression() - local crit = nn.SpatialBinaryLogisticRegression() - local k = 32 - local h = 28 - local w = 28 - - -- Working with batch of images - local input = torch.zeros(k, 1, h, w) - local target = torch.zeros(k, 1, h, w) - local inputs = {1, 0, -1} - local targets = {1, 0, -1} - for _,i in pairs(inputs) do - for _,t in pairs(targets) do - - input:fill(i) - target:fill(t) - -- Check forward - local loss = crit:updateOutput(input, target) - local myLoss = math.log(1+math.exp(-1*i*t))/2 - mytester:assert( loss >= myLoss-precision and loss <= myLoss+precision, - "SpatialBinaryLogisticRegression cost incorrect.") - - -- Check backward - local gradInput = crit:updateGradInput(input, target) - local g1 = gradInput[1][1][1][1] - local gi = (1/(1+math.exp(-1*i*t)))*math.exp(-1*i*t)*(-1*t)/(2*k*h*w) - mytester:assert( g1 >= gi-precision and g1 <= gi+precision, - "SpatialBinaryLogisticRegression gradInput error.") - end - end - - -- Working with single image - k = 1 - local input = torch.zeros(1, h, w) - local target = torch.zeros(1, h, w) - local inputs = {1, 0, -1} - local targets = {1, 0, -1} - for _,i in pairs(inputs) do - for _,t in pairs(targets) do - - input:fill(i) - target:fill(t) - -- Check forward - local loss = crit:updateOutput(input, target) - local myLoss = math.log(1+math.exp(-1*i*t))/2 - mytester:assert( loss >= myLoss-precision and loss <= myLoss+precision, - "SpatialBinaryLogisticRegression cost incorrect.") - - -- Check backward - local gradInput = crit:updateGradInput(input, target) - local g1 = gradInput[1][1][1] - local gi = (1/(1+math.exp(-1*i*t)))*math.exp(-1*i*t)*(-1*t)/(2*k*h*w) - mytester:assert( g1 >= gi-precision and g1 <= gi+precision, - "SpatialBinaryLogisticRegression gradInput error.") - end - end -end - --- Unit Test BinaryLogisticRegression criterion -function rnntest.BinaryLogisticRegression() - local crit = nn.BinaryLogisticRegression() - local k = 32 - - -- Working with batch of images - local input = torch.zeros(k, 1) - local target = torch.zeros(k, 1) - local inputs = {1, 0, -1} - local targets = {1, 0, -1} - for _,i in pairs(inputs) do - for _,t in pairs(targets) do - - input:fill(i) - target:fill(t) - -- Check forward - local loss = crit:updateOutput(input, target) - local myLoss = math.log(1+math.exp(-1*i*t)) - mytester:assert( loss >= myLoss-precision and loss <= myLoss+precision, - "BinaryLogisticRegression cost incorrect.") - - -- Check backward - local gradInput = crit:updateGradInput(input, target) - local g1 = gradInput[1][1] - local gi = (1/(1+math.exp(-1*i*t)))*math.exp(-1*i*t)*(-1*t)/(k) - mytester:assert( g1 >= gi-precision and g1 <= gi+precision, - "BinaryLogisticRegression gradInput error.") - end - end - - -- Working nElements not matching. - local input = torch.zeros(1, k) - local target = torch.zeros(k, 1) - local inputs = {1, 0, -1} - local targets = {1, 0, -1} - for _,i in pairs(inputs) do - for _,t in pairs(targets) do - - input:fill(i) - target:fill(t) - -- Check forward - local loss = crit:updateOutput(input, target) - local myLoss = math.log(1+math.exp(-1*i*t)) - mytester:assert( loss >= myLoss-precision and loss <= myLoss+precision, - "BinaryLogisticRegression cost incorrect.") - - -- Check backward - local gradInput = crit:updateGradInput(input, target) - local g1 = gradInput[1][1] - local gi = (1/(1+math.exp(-1*i*t)))*math.exp(-1*i*t)*(-1*t)/(k) - mytester:assert( g1 >= gi-precision and g1 <= gi+precision, - "BinaryLogisticRegression gradInput error.") - end - end -end - --- Unit Test SpatialRegionDropout -function rnntest.SpatialRegionDropout() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local p = 0.2 - local value = 2 - local model = nn.SpatialRegionDropout(p) - local input = torch.zeros(3, 100, 100):fill(value) - - for _, useCuda in pairs(useCudas) do - if useCuda then - model:cuda() - input = input:cuda() - end - local output = model:forward(input) - mytester:assert( output:mean() >= value-precision and - output:mean() <= value+precision, - "SpatialRegionDropout forward mean value incorrect.") - - local gradInput = model:backward(input, input) - mytester:assert( gradInput:mean() >= value-precision and - gradInput:mean() <= value+precision, - "SpatialRegionDropout backward mean value incorrect.") - end -end - --- Unit Test SpatialBinaryConvolution -function rnntest.SpatialBinaryConvolution() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local nInputPlane = 3 - local nOutputPlane = 16 - local kW = 3 - local kH = 3 - local height = 224 - local width = 224 - - local model = nn.SpatialBinaryConvolution(nInputPlane, nOutputPlane, - kW, kH) - local input = torch.rand(nInputPlane, height, width) - - for _, useCuda in pairs(useCudas) do - if useCuda then - model:cuda() - input = input:cuda() - end - model:zeroGradParameters() - local output = model:forward(input) - local gradInput = model:backward(input, output) - end -end - --- Unit Test SimpleColorTransform -function rnntest.SimpleColorTransform() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local value = 10 - local rangeValue = 2 - local precision = rangeValue*0.1 - local range = torch.zeros(3):fill(rangeValue) - local model = nn.SimpleColorTransform(3, range) - local input = torch.zeros(32, 3, 100, 100):fill(value) - - for _, useCuda in pairs(useCudas) do - if useCuda then - model:cuda() - input = input:cuda() - end - local output = model:forward(input) - mytester:assert(output:std() <= rangeValue+precision, - "SimpleColorTransform output value incorrect.") - local gradInput = model:backward(input, input) - mytester:assert(gradInput:sum() == input:sum(), - "SimpleColorTransform gradInput value incorrect.") - end -end - --- Unit Test PCAColorTransform -function rnntest.PCAColorTransform() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local std = 0.1 - local value = 145 - local rangeValue = 1800 - local precision = rangeValue * 3 * std - local eigenVectors = torch.Tensor({{ 0.58786434, 0.56388045, 0.58004685}, - {-0.65427388, -0.0902746 , 0.75085031}, - {-0.47575331, 0.82090763, -0.31586303}}) - local eigenValues = torch.Tensor({4491.21, 722.85, 68.07}) - local model = nn.PCAColorTransform(3, eigenVectors, eigenValues, std) - local input = torch.zeros(32, 3, 100, 100):fill(value) - - for _, useCuda in pairs(useCudas) do - if useCuda then - model:cuda() - input = input:cuda() - end - local output = model:forward(input) - mytester:assert(output:std() <= rangeValue+precision, - "PCAColorTransform output value incorrect.") - local gradInput = model:backward(input, input) - mytester:assert(gradInput:sum() == input:sum(), - "PCAColorTransform gradInput value incorrect.") - end -end - --- Unit Test FireModule -function rnntest.FireModule() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local activations = {'ReLU', 'Tanh', 'Sigmoid'} - local nInputPlane = 3 - local width = 32 - local height = 32 - local s1x1 = 16 - local e1x1 = 16 - local e3x3 = 16 - for _, activation in pairs(activations) do - for _, useCuda in pairs(useCudas) do - local model = nn.FireModule(nInputPlane, s1x1, e1x1, e3x3) - local input = torch.rand(1, nInputPlane, height, width) - if useCuda then - model:cuda() - input = input:cuda() - end - local output = model:forward(input) - local gradInput = model:backward(input, output) - end - end -end - --- Unit Test SpatialFeatNormalization -function rnntest.SpatialFeatNormalization() - local hasCuda = pcall(function() require 'cunn' end) - local useCudas = {false, hasCuda} - local input = torch.zeros(3, 32, 32):fill(2) - local mean = torch.zeros(3):fill(1) - local std = torch.zeros(3):fill(0.5) - local outputValue = 2 - local gradValue = 4 - for _, useCuda in pairs(useCudas) do - local model = nn.SpatialFeatNormalization(mean, std) - if useCuda then - model:cuda() - input = input:cuda() - end - local output = model:forward(input) - local gradInput = model:backward(input, output) - mytester:assert( output:mean() == outputValue, - "SpatialFeatNormalization forward mean value incorrect.") - mytester:assert( gradInput:mean() == gradValue, - "SpatialFeatNormalization backward mean value incorrect.") - end -end - -function rnntest.OneHot() - local nClass = 10 - - -- batch mode - local batchSize = 3 - local input = torch.LongTensor(batchSize):random(1, nClass) - local gradOutput = torch.randn(batchSize, nClass) - - local oh = nn.OneHot(nClass) - - local output = oh:forward(input) - local output2 = torch.Tensor(batchSize, nClass):zero() - local eye = torch.eye(nClass) - output2:index(eye, 1, input) - mytester:assertTensorEq(output, output2, 0.000001, "OneHot forward batch err") - mytester:assert(output:dim() == 2) - - -- non-batch mode (number input) - local num = 3 - local output3 = torch.zeros(nClass) - output3[num] = 1.0 - mytester:assertTensorEq(oh:forward(num), output3, 0.000001, "OneHot forward number err") - - local gradInput = oh:backward(input, gradOutput) - mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot backward batch err") - - if pcall(function() require 'cunn' end) then - oh:cuda() - - -- test with long input - local output = oh:forward(input) - mytester:assert(torch.type(output) == 'torch.CudaTensor') - mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch long-cuda err") - - -- test with cuda input - local input = input:cuda() - gradOutput = gradOutput:cuda() - - local output = oh:forward(input) - mytester:assert(torch.type(output) == 'torch.CudaTensor') - mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch cuda err") - - local gradInput2 = oh:backward(input, gradOutput) - mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot backward batch err") - cutorch.synchronize() - - -- non-batch mode (number input) - mytester:assertTensorEq(oh:forward(num), output3:cuda(), 0.000001, "OneHot forward number err") - end - - -- multi-dimensional input - local inputSize = 2 - local input = torch.LongTensor(batchSize, inputSize):random(1, nClass) - local gradOutput = torch.randn(batchSize, inputSize, nClass) - - local oh = nn.OneHot(nClass, 2) - - local output = oh:forward(input) - local output2 = torch.Tensor(batchSize*inputSize, nClass):zero() - local eye = torch.eye(nClass) - output2:index(eye, 1, input:view(-1)) - output2:resize(batchSize, inputSize, nClass) - mytester:assertTensorEq(output, output2, 0.000001, "OneHot 2d forward batch err") - mytester:assert(output:dim() == 3) - - local gradInput = oh:backward(input, gradOutput) - mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot 2d backward batch err") - - if pcall(function() require 'cunn' end) then - oh:cuda() - - -- test with long input - local output = oh:forward(input) - mytester:assert(torch.type(output) == 'torch.CudaTensor') - mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch long-cuda err") - - -- test with cuda input - local input = input:cuda() - gradOutput = gradOutput:cuda() - - local output = oh:forward(input) - mytester:assert(torch.type(output) == 'torch.CudaTensor') - mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch cuda err") - - local gradInput2 = oh:backward(input, gradOutput) - mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot 2d backward batch err") - - local benchmark = false - if benchmark then - local input = torch.FloatTensor(50, 50):random(1,65):cuda() - - local oh = nn.OneHot(65):cuda() - - oh:forward(input) - cutorch.synchronize() - local a = torch.Timer() - for i=1,10 do - oh:forward(input) - end - cutorch.synchronize() - local gputime = a:time().real - - oh:float() - input = input:float() - oh:forward(input) - a = torch.Timer() - for i=1,10 do - oh:forward(input) - end - local cputime = a:time().real - print("Onehot GPU vs CPU time", gputime, cputime) - end - end -end - function rnntest.NCE_main() local batchsize = 4 local k = 10 diff --git a/test/test_firemodule.lua b/test/test_firemodule.lua deleted file mode 100644 index 9e36edb..0000000 --- a/test/test_firemodule.lua +++ /dev/null @@ -1,40 +0,0 @@ -require 'nn' -require 'rnn' -require 'cunn' -require 'cutorch' - ---torch.setdefaulttensortype('torch.FloatTensor') - --- FireModule issue 45 ---[[ -m = nn.Sequential() -m:add(nn.FireModule(1,1,1,1)) -_, p = m:getParameters() -print(p:sum()) - -m = m:cuda() -_, p = m:getParameters() -print(p:sum()) - -m:zeroGradParameters() -print(p:sum())--]] - - --- Testing FireModule -input = torch.rand(1, 3, 6, 6) -model = nn.FireModule(3, 1, 1, 1, 'Tanh') -print(model) -print(model.module) -parameters, gradParameters = model:getParameters() -output = model:forward(input) -grads = torch.rand(output:size()) -gi = model:backward(input, grads) -print(gi:mean(), gi:std(), gi:min(), gi:max()) - -cutorch.setDevice(1) -model:cuda() -print(model.module.modules[1].finput) -cinput = input:cuda() -output = model:forward(cinput) -gi = model:backward(input:cuda(), grads:cuda()) -print(gi:mean(), gi:std(), gi:min(), gi:max()) diff --git a/tutorials/ladder.md b/tutorials/ladder.md deleted file mode 100644 index 591a21b..0000000 --- a/tutorials/ladder.md +++ /dev/null @@ -1,107 +0,0 @@ -# Lateral Connections in Denoising Autoencoders Support Supervised Learning - -In this tutorial we will understand how to implement ladder network as explained in [[1](http://arxiv.org/pdf/1504.08215.pdf)]. In this paper the authors have shown how unsupervised learning using a denoising autoencoder with lateral connections help improve the classification accuracy in supervised learning. - -To produce results as mentioned in the paper please run following command (best test error we got was **`0.6%`**). To run this script you will need following torch packages: [`nn`](https://github.com/torch/nn), [`nngraph`](https://github.com/torch/nngraph), [`dp`](https://github.com/nicholas-leonard/dp), [`dpnn`](https://github.com/Element-Research/dpnn), [`optim`](https://github.com/torch/optim) and [`cunn`](https://github.com/torch/cunn) & [`cutorch`](https://github.com/torch/cutorch) if using cuda (```--useCuda``` flag). -``` - th tutorials/ladder.lua --verbose --eta 500 --epochs 100 --learningRate 0.002 --linearDecay --endLearningRate 0 --startEpoch 50 --useCuda --deviceId 1 --noiseSigma 0.3 --useBatchNorm --batchSize 100 --adam --noValidation --attempts 10 -``` - -The unsupervised learning (denoising) task supplements the supervised learning task (classification in this case). As in autoencoders this network has an encoder and a decoder. The output of encoder is also used for classification. The output of encoder is **`N`** dimensional where **`N`** is number of classes. This **`N`** dimensional vector is used for computing classification cost as well as feeds into the decoder. - -## Classification -Encoder/classifier units are defined as -```lua - Z = nn.BatchNormalization(hidden_units)(nn.Linear(inputDims, hidden_units)(previous_H)) -``` -where -```lua - H = nn.ReLU()(nn.CMul()(nn.Add()(Z))) -``` -For first layer **`previous_H`** is the corrupted input. -```lua - input = nn.WhiteNoise(mean, sigma) -``` - -**`H`** for last encoder unit is defined as -```lua - H = nn.LogSoftMax()(nn.CMul()(nn.Add()(Z))) -``` -Last **`H`** feeds into the negative log likelihood criterion. - -## Denoising -Typically in denoising autoencoder the input samples are corrupted using Dropout [```nn.Dropout```](https://github.com/torch/nn/blob/master/Dropout.lua) but in this paper the authors use isotropic Gaussian noise [```nn.WhiteNoise```](https://github.com/Element-Research/dpnn/blob/master/WhiteNoise.lua) with zero mean. - -### Lateral Connections in Autoencoder -**`Z`** units in encoder are laterally connected to corresponding unit in the decoder. The output of decoder unit for neuron `i` is defined by -``` - z^_i = a_i1 * z_i + a_i2 * sigmoid(a_i3 + a_i4) + a_i5 -``` -where -``` - a_ij = c_ij * u_i + d_ij -``` -**`U`** is output of decoder unit's ```nn.Linear()```. For the top most layer **`U`** is zero. **`Z`** is output of corresponding encoder unit (this is lateral connection, decoder takes output from its previous unit through **`U`** as well as corresponding encoder unit). For the lowest layer of decoder **`Z`** is the corrupted input signal. **`c_j`** and **`d_j`** are trainable weight vectors. This forms the crux of the ladder network. This can be easily implemented using **`nngraph`** as follows - -For the topmost layer **`U`**`= 0` and **`Z`** is the batch normalized output from the corresponding (in this case last) encoder/classifier unit. **`Z^`** for topmost layer is defined as -```lua - z_hat1 = nn.CMul(hiddens[i])(Z) - z_hat2 = nn.CMul(hiddens[i])(Z) - z_hat3 = nn.CMul(hiddens[i])(Z) - z_hat34 = nn.Add(hiddens[i])(z_hat3) - z_hatSigmoid34 = nn.Sigmoid()(z_hat34) - z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34}) - z_hat5 = nn.CMul(hiddens_units)(Z) - - -- Z_hat = z^ - Z_hat = nn.CAddTable()({z_hat1, z_hat234, z_hat5}) -``` - -For lower decoder units **`Z^`** is defined as -```lua - - u = nn.Linear()(previous_Z_hat) - - cu1 = nn.CMul(hidden_units)(u) - du1 = nn.Add(hidden_units])(u) - a1 = nn.CAddTable()({cu1, du1}) - cu2 = nn.CMul(hidden_units)(u) - du2 = nn.Add(hidden_units)(u) - a2 = nn.CAddTable()({cu2, du2}) - cu3 = nn.CMul(hidden_units)(u) - du3 = nn.Add(hidden_units)(u) - a3 = nn.CAddTable()({cu3, du3}) - cu4 = nn.CMul(hidden_units)(u) - du4 = nn.Add(hidden_units)(u) - a4 = nn.CAddTable()({cu4, du4}) - cu5 = nn.CMul(hidden_units)(u) - du5 = nn.Add(hidden_units)(u) - a5 = nn.CAddTable()({cu5, du5}) - - z_hat1 = nn.CMulTable()({a1, z}) - z_hat2 = nn.CMulTable()({a3, z}) - z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4})) - z_hat4 = nn.CMulTable()({a2, z_hat3}) - Z_hat = nn.CAddTable()({z_hat1, z_hat4, a5}) -``` -`Z_hat` is `z^`. Final `Z_hat` is the output of decoder and feeds into the mean squared error criterion. - -## Criterions -Negative log likelihood criterion is used for classification task. -```lua - nll = nn.ClassNLLCriterion() -``` -Mean squared error is used for the auxillary task. -```lua - mse = nn.MSECriterion() -``` -These two training criterions are combined using `eta` which determines weight for auxillary task. If `eta` is zero then the model is trained for classification only. -Combined criterion -```lua - criterions = ParallelCriterion() - criterions:add(nll) - criterions:add(mse, eta) -``` - -## References -[1] Rasmus, Antti, Harri Valpola, and Tapani Raiko. "Lateral Connections in Denoising Autoencoders Support Supervised Learning." arXiv preprint arXiv:1504.08215 (2015). diff --git a/tutorials/ladder_network/ladder.lua b/tutorials/ladder_network/ladder.lua deleted file mode 100644 index 5e556cf..0000000 --- a/tutorials/ladder_network/ladder.lua +++ /dev/null @@ -1,444 +0,0 @@ ---[[! - Implementation of ladder as mentioned in http://arxiv.org/pdf/1504.08215.pdf ---]] - -require 'nn' -require 'dp' -require 'dpnn' -require 'math' -require 'xlua' -require 'optim' -require 'nngraph' - --- Cuda -require 'cutorch' -require 'cunn' - --- Help functions -require 'ladder_help_funcs' - -torch.setdefaulttensortype("torch.FloatTensor") -op = xlua.OptionParser('%prog [options]') - --- Data -op:option{'--noValidation', action='store_true', dest='noValidation', - help='Use validation data for training as well.', default=false} -op:option{'--best', action='store_true', dest='best', - help='Use best training or validation model.', default=false} - --- Model parameters -op:option{'--noOfClasses', action='store', dest='noOfClasses', - help='Number of classes.', default=10} -- MNIST data -op:option{'--noiseSigma', action='store', dest='noiseSigma', - help='Stdev for noise for denoising autoencoder (Mean is zero).', - default=0} -op:option{'--hiddens', action='store', dest='hiddens', - help='Hiddens units', default='{1000, 500, 250, 250, 250}'} -op:option{'--useBatchNorm', action='store_true', dest='useBatchNorm', - help='Use batch normalization.', default=false} -op:option{'--weightTied', action='store_true', dest='weightTied', - help='Tie weights of decoder with encoder.', default=false} - --- Criterion and learning -op:option{'--attempts', action='store', dest='attempts', - help='Run attempts independent experiments.', default=1} -op:option{'--eta', action='store', dest='eta', - help='If zero then only classifier cost is considered.', default=0} -op:option{'--batchSize', action='store', dest='batchSize', - help='Batch Size.',default=32} -op:option{'--epochs', action='store', dest='epochs', - help='Number of epochs.',default=100} -op:option{'--maxTries', action='store', dest='maxTries', - help='Number of tries for stopping.',default=0} -op:option{'--learningRate', action='store', dest='learningRate', - help='Learning rate',default=0.002} -op:option{'--learningRateDecay', action='store', dest='learningRateDecay', - help='Learning rate decay',default=1e-7} -op:option{'--linearDecay', action='store_true', dest='linearDecay', - help='Linearly reduce learning rate', default=false} -op:option{'--startEpoch', action='store', dest='startEpoch', - help='Epoch number when to start linear decay.',default=1} -op:option{'--endLearningRate', action='store', dest='endLearningRate', - help='Learning rate at last epoch',default=0.0} -op:option{'--momentum', action='store', dest='momentum', - help='Learning Momemtum',default=0} -op:option{'--loss', action='store_true', dest='loss', - help='If true use loss for early stopping else confusion matrix.', - default=false} -op:option{'--adam', action='store_true', dest='adam', - help='Use adaptive moment estimation optimizer.', default=false} - --- Use Cuda -op:option{'--useCuda', action='store_true', dest='useCuda', help='Use GPU', - default=false} -op:option{'--deviceId', action='store', dest='deviceId', help='GPU device Id', - default=2} - --- Print debug messages -op:option{'--verbose', action='store_true', dest='verbose', - help='Print apppropriate debug messages.', default=false} - --- Command line arguments -opt = op:parse() -op:summarize() - --- Data -noValidation = opt.noValidation -best = opt.best -verbose = opt.verbose - - -- Cuda -useCuda = opt.useCuda -deviceId = tonumber(opt.deviceId) - --- MNIST Data source -ds = dp.Mnist{} - -attempts = tonumber(opt.attempts) -testAccus = torch.zeros(attempts) -trData = {} -tvData = {} -tsData = {} -for attempt=1,attempts do - - local t1, t2 - - trData.data, t1, t2 = ds:get('train', 'input', 'bchw', 'float') - trData.labels, t1, t2 = ds:get('train', 'target') - trData.size = function() return trData.data:size()[1] end - - tvData.data, t1, t2 = ds:get('valid', 'input', 'bchw', 'float') - tvData.labels, t1, t2 = ds:get('valid', 'target') - tvData.size = function() return tvData.data:size()[1] end - - tsData.data, t1, t2 = ds:get('test', 'input', 'bchw', 'float') - tsData.labels, t1, t2 = ds:get('test', 'target') - tsData.size = function() return tsData.data:size()[1] end - collectgarbage() - - local tempSample = trData.data[1] - local channels = tempSample:size(1) - local width = tempSample:size(2) - local height = tempSample:size(3) - local linFeats = channels * height * width - - -- MNIST - local classes = {'1', '2', '3', '4', '5', '6', '7', '8', '9', '10'} - local confusion = optim.ConfusionMatrix(classes) - - -- Model - local noOfClasses = tonumber(opt.noOfClasses) - local noiseSigma = tonumber(opt.noiseSigma) - local inputHiddens = dp.returnString(opt.hiddens) - local useBatchNorm = opt.useBatchNorm - local weightTied = opt.weightTied - - - hiddens = {linFeats} - for i=1,#inputHiddens do - hiddens[#hiddens+1] = inputHiddens[i] - end - hiddens[#hiddens+1] = noOfClasses - - -- encoder input - local input = nil - if noiseSigma ~= 0 then - if verbose then print("Add noise to the samples.") end - input = nn.WhiteNoise(0, noiseSigma)() - else - input = nn.Identity()() - end - - -- encoder model - local encoderLayers = {} - local Zs = {} - Zs[1] = input - local Hs = {} - Hs[1] = input - for i=2,#hiddens do - -- Zs - encoderLayers[i] = nn.Linear(hiddens[i-1], hiddens[i]) - if useBatchNorm then - Zs[i] = nn.BatchNormalization(hiddens[i]) - (encoderLayers[i](Hs[i-1])) - else - Zs[i] = encoderLayers[i](Hs[i-1]) - end - - -- Hs - if i==#hiddens then - Hs[i] = nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i])) - else - Hs[i] = nn.ReLU()(nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i]))) - end - end - - -- classifier - local classifier = nn.LogSoftMax()(Hs[#Hs]) - - -- Decoder - local decoderLayers = {} - local Z_hats = {} - for i=#hiddens,1,-1 do - - -- u = 0 hence no cij - if i==#hiddens then - z_hat1 = nn.CMul(hiddens[i])(Zs[i]) - z_hat2 = nn.CMul(hiddens[i])(Zs[i]) - z_hat3 = nn.CMul(hiddens[i])(Zs[i]) - z_hat34 = nn.Add(hiddens[i])(z_hat3) - z_hatSigmoid34 = nn.Sigmoid()(z_hat34) - z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34}) - z_hat5 = nn.CMul(hiddens[i])(Zs[i]) - Z_hats[i] = nn.CAddTable()({z_hat1, z_hat234, z_hat5}) - else - decoderLayers[i] = nn.Linear(hiddens[i+1], hiddens[i]) - if weightTied then - if verbose then print("Tying encoder-decoder weights.") end - decoderLayers[i].weight:set(encoderLayers[i+1].weight:t()) - decoderLayers[i].gradWeight:set(encoderLayers[i+1].gradWeight:t()) - end - - u = decoderLayers[i](Z_hats[i+1]) - - cu1 = nn.CMul(hiddens[i])(u) - du1 = nn.Add(hiddens[i])(u) - a1 = nn.CAddTable()({cu1, du1}) - cu2 = nn.CMul(hiddens[i])(u) - du2 = nn.Add(hiddens[i])(u) - a2 = nn.CAddTable()({cu2, du2}) - cu3 = nn.CMul(hiddens[i])(u) - du3 = nn.Add(hiddens[i])(u) - a3 = nn.CAddTable()({cu3, du3}) - cu4 = nn.CMul(hiddens[i])(u) - du4 = nn.Add(hiddens[i])(u) - a4 = nn.CAddTable()({cu4, du4}) - cu5 = nn.CMul(hiddens[i])(u) - du5 = nn.Add(hiddens[i])(u) - a5 = nn.CAddTable()({cu5, du5}) - - z_hat1 = nn.CMulTable()({a1, Zs[i]}) - z_hat2 = nn.CMulTable()({a3, Zs[i]}) - z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4})) - z_hat4 = nn.CMulTable()({a2, z_hat3}) - Z_hats[i] = nn.CAddTable()({z_hat1, z_hat4, a5}) - end - end - local model = nn.gModule({input}, {classifier, Z_hats[1]--[[Decoder--]]}) - if verbose then print(model) end - - -- Criterion and learning - -- Criterion - local eta = tonumber(opt.eta) - local criterions = nn.ParallelCriterion() - local nll = nn.ClassNLLCriterion() - local mse = nn.MSECriterion() - criterions:add(nll) - criterions:add(mse, eta) - - -- Learning - local batchSize = tonumber(opt.batchSize) - local epochs = tonumber(opt.epochs) - local maxTries = tonumber(opt.maxTries) - local learningRate = tonumber(opt.learningRate) - local learningRateDecay = tonumber(opt.learningRateDecay) - local linearDecay = opt.linearDecay - local startEpoch = tonumber(opt.startEpoch) - local endLearningRate = tonumber(opt.endLearningRate) - assert(epochs > startEpoch, "startEpoch should be smaller than epochs.") - - if linearDecay then - if verbose then print("Using linear decay.") end - learningRates = torch.zeros(startEpoch):fill(learningRate) - local temp = torch.range(learningRate, endLearningRate, - -learningRate/(epochs-startEpoch)) - learningRates = torch.cat(learningRates, temp) - end - - local momentum = tonumber(opt.momentum) - local loss = opt.loss - local adam = opt.adam - - -- Optimizer - local optimState = { - coefL1 = 0, - coefL2 = 0, - learningRate = learningRate, - weightDecay = 0.0, - momentum = momentum, - learningRateDecay = learningRateDecay - } - - -- If true use Adaptive moment estimation else SGD. - if adam then - if verbose then print("Using Adaptive moment estimation optimizer.") end - optimMethod = optim.adam - else - if verbose then print("Using Stocastic gradient descent optimizer.") end - optimMethod = optim.sgd - end - if verbose then - print(optimMethod) - print(optimState) - end - - - if useCuda then - if verbose then print("Using GPU: "..deviceId) end - cutorch.setDevice(deviceId) - if verbose then print("GPU set") end - model:cuda() - if verbose then print("Model copied to GPU.") end - criterions:cuda() - if verbose then print("Criterion copied to GPU.") end - else - if verbose then print("Not using GPU.") end - end - - -- Retrieve parameters and gradients - parameters, gradParameters = model:getParameters() - - -- Reshape samples from images to vectors - trData.data = trData.data:reshape(trData.size(1), linFeats) - tvData.data = tvData.data:reshape(tvData.size(1), linFeats) - tsData.data = tsData.data:reshape(tsData.size(1), linFeats) - collectgarbage() - - if noValidation then - trData.data = torch.cat(trData.data, tvData.data, 1) - trData.labels = torch.cat(trData.labels, tvData.labels, 1) - tvData.data = nil - tvData.labels = nil - collectgarbage() - end - - if verbose then - print(trData) - print(tvData) - print(tsData) - end - - -- Training - local displayProgress = verbose - local classifierIndx = 1 - local trainAccu = 0 - local validAccu = 0 - local bestTrainAccu = 0 - local bestValidAccu = 0 - local trainLoss = 0 - local validLoss = 0 - local bestTrainLoss = math.huge - local bestValidLoss = math.huge - local bestTrainModel = nn.Sequential() - local bestValidModel = nn.Sequential() - local earlyStopCount = 0 - for i=1, epochs do - if linearDecay then - optimState.learningRate = learningRates[i] - end - -- Training - trainLoss = model_train_multi_criterion(model, criterions, - parameters, gradParameters, trData, - optimMethod, optimState, batchSize, - i, confusion, trainLogger, - useCuda, displayProgress, - classiferIndx) - confusion:updateValids() - if loss then - if verbose then - print("Current train loss: ".. trainLoss - ..", best train loss: " .. bestTrainLoss) - end - if trainLoss < bestTrainLoss then - bestTrainLoss = trainLoss - bestTrainModel = model:clone() - print(confusion) - end - else -- Using classification accuracy for saving best train model - trainAccu = confusion.totalValid * 100 - if bestTrainAccu < trainAccu then - bestTrainAccu = trainAccu - bestTrainModel = model:clone() - bestTrainLoss = trainLoss - end - if verbose then - print("Current train accu: ".. trainAccu - ..", best train accu: " .. bestTrainAccu - ..", best train loss: " .. bestTrainLoss) - end - end - - -- Validating - if not noValidation then - validLoss = model_test_multi_criterion(model, criterions, - tvData, confusion, - useCuda, classifierIndx) - confusion:updateValids() - if loss then - if verbose then - print("Current valid loss: ".. validLoss - ..", best valid loss: " .. bestValidLoss) - end - if validLoss < bestValidLoss then - earlyStopCount = 0 - bestValidLoss = validLoss - bestValidModel = model:clone() - print(confusion) - else - earlyStopCount = earlyStopCount + 1 - end - else - validAccu = confusion.totalValid * 100 - if bestValidAccu < validAccu then - earlyStopCount = 0 - bestValidAccu = validAccu - bestValidModel = model:clone() - bestValidLoss = validLoss - else - earlyStopCount = earlyStopCount + 1 - end - if verbose then - print("Current valid accu: ".. validAccu - ..", best valid accu: " .. bestValidAccu - ..", best valid loss: " .. bestValidLoss) - end - end - if verbose then - print(noiseSigma, weightTied, useBatchNorm, eta, earlyStopCount) - end - end - - if maxTries ~= 0 then - if earlyStopCount >= maxTries then - if verbose then print("Early stopping at epoch: " .. i) end - break - end - end - end - - -- Testing - if best then - if noValidation then - testLoss = model_test_multi_criterion(bestTrainModel, criterions, - tsData, confusion, - useCuda, classifierIndx) - else - testLoss = model_test_multi_criterion(bestValidModel, criterions, - tsData, confusion, - useCuda, classifierIndx) - end - else - testLoss = model_test_multi_criterion(model, criterions, - tsData, confusion, - useCuda, classifierIndx) - end - confusion:updateValids() - testAccu = confusion.totalValid * 100 - testAccus[attempt] = testAccu - if verbose then - print("Attempt: " .. tostring(attempt) .. " Test Accu: " .. testAccu) - end -end -print("Test accuracies.") -print(testAccus) -print("Max Test Error is: " .. tostring(100 - testAccus:max()) .. "%") diff --git a/tutorials/ladder_network/ladder_help_funcs.lua b/tutorials/ladder_network/ladder_help_funcs.lua deleted file mode 100644 index e6fe25e..0000000 --- a/tutorials/ladder_network/ladder_help_funcs.lua +++ /dev/null @@ -1,220 +0,0 @@ -require 'csvigo' -require 'string' -require 'xlua' -require 'lfs' - --- Training function test --- Processing a batch in one Go. --- Has useCuda option to run on GPU [model and criterion expected in CUDA] -local conTargets, conOutputs -function model_train_multi_criterion(model, criterions, parameters, - gradParameters, trainData, - optimMethod, optimState, batchSize, - epoch, confusion, trainLogger, - useCuda, displayProgress, classifierIndx) - - model:training() - confusion:zero() - local displayProgress = displayProgress or false - local classifierIndx = classifierIndx or 1 - - -- epoch tracker - local epoch = epoch or 1 - - local totalLoss = 0 - - -- shuffle at each epoch - local shuffle = torch.randperm(trainData.size()) - - local sampleSize = trainData.data[1]:size() - local isScalar = false - local labelSize - if trainData.labels:size():size() == 1 then - isScalar = true - else - labelSize = trainData.labels[1]:size() - end - - print("Doing epoch on training data:") - print("Online epoch # " .. epoch .. " [batchSize = " .. batchSize .. "]") - - -- local variables - local time = sys.clock() - local inputs - local targets - if isScalar then - targets = torch.Tensor(batchSize) - else - targets = torch.Tensor(batchSize, labelSize[1]) - end - - -- Samples - sizeLen = sampleSize:size() - if sizeLen == 1 then - inputs = torch.Tensor(batchSize, sampleSize[1]) - elseif sizeLen == 2 then - inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2]) - elseif sizeLen == 3 then - inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2], - sampleSize[3]) - else - print("Invalid Sample Size") - end - - local trainInputs = useCuda and torch.CudaTensor() or torch.FloatTensor() - local trainTargets = useCuda and torch.CudaTensor() or torch.FloatTensor() - local criterionTargets - - t = 1 - while t <= trainData.size() do - if displayProgress then xlua.progress(t, trainData.size()) end - noOfSamples = math.min(t + batchSize -1, trainData.size()) - --create mini batch - indx = 1 - for i=t,math.min(t+batchSize-1, trainData.size()) do - -- Load new sample - inputs[indx] = trainData.data[shuffle[i]] - targets[indx] = trainData.labels[shuffle[i]] - indx = indx + 1 - end - indx = indx - 1 - - local inputs_ = inputs[{{1,indx}}] - trainInputs:resize(inputs_:size()):copy(inputs_) - - local targets_ = targets[{{1,indx}}] - trainTargets:resize(targets_:size()):copy(targets_) - - criterionTargets = {trainTargets, trainInputs} - - t = t + batchSize - - -- create closure to evaluate F(X) and df/dX - local feval = function(x) - -- Get new parameters - if x ~= parameters then - parameters:copy(x) - end - - -- reset gradients - gradParameters:zero() - - -- evaluate function for complete mini batch - local outputs = model:forward(trainInputs) - local f = criterions:forward(outputs, criterionTargets) - -- Total Loss - totalLoss = totalLoss + f - - local df_do = criterions:backward(outputs, criterionTargets) - model:backward(trainInputs, df_do) - - if useCuda then - conOutputs = outputs[classifierIndx]:float() - conTargets = trainTargets:float() - else - conOutputs = outputs[classifierIndx] - conTargets = trainTargets - end - - confusion:batchAdd(conOutputs, conTargets) - - -- Normalize gradients - gradParameters:div(trainInputs:size()[1]) - f = f/trainInputs:size()[1] - - -- L1/L2 Regularization - if optimState.coefL1 ~= 0 or optimState.coefL2 ~= 0 then - -- locals" - local norm, sign = torch.norm, torch.sign - - -- Update loss with regularizer - f = f + optimState.coefL1 * norm(parameters, 1) - f = f + optimState.coefL2 * norm(parameters, 2)^2/2 - - -- Gradients - gradParameters:add(sign(parameters):mul(optimState.coefL1) - + parameters:clone():mul(opt.coefL2)) - end - - -- return f and df/dX - return f, gradParameters - end - - -- optimize on current mini batch # Using SGD/adam - optimMethod(feval, parameters, optimState) - end - - -- time taken - time = sys.clock() - time - time = time/trainData.size() - print("\n==> time to learn 1 sample = " .. (time*1000) .. "ms") - - -- Total loss - totalLoss = totalLoss/trainData.size() - - -- update logger - if trainLogger ~= nil then - trainLogger:add{["% mean class accuracy (train set)"] = - confusion.totalValid * 100} - end - return totalLoss -end - -function model_test_multi_criterion(model, criterions, testData, confusion, - useCuda, classifierIndx) - local time = sys.clock() - model:evaluate() - confusion:zero() - local classifierIndx = classifierIndx or 1 - local totalLoss = 0 - local criterionTargets - - if useCuda then - local batchSize = 64 - local inputs = torch.CudaTensor() - local testInputs - local cpu_targets - local gpu_targets = torch.CudaTensor() - local gpu_preds - local cpu_preds - local i = 1 - local j = 0 - while i <= testData.size() do - j = math.min(i + batchSize -1, testData.size()) - -- Copy input and targets to cuda - testInputs = testData.data[{{i, j}}] - inputs:resize(testInputs:size()):copy(testInputs) - cpu_targets = testData.labels[{{i, j}}] - gpu_targets:resize(cpu_targets:size()):copy(cpu_targets) - criterionTargets = {gpu_targets, inputs} - - gpu_preds = model:forward(inputs) - totalLoss = totalLoss + criterions:forward(gpu_preds, - criterionTargets) - cpu_preds = gpu_preds[classifierIndx]:float() - confusion:batchAdd(cpu_preds, cpu_targets) - i = i + batchSize - end - else - local trainInputs = testData.data - local trainTargets = testData.labels - criterionTargets = {trainTargets, trainInputs} - - local outputs = model:forward(trainInputs) - totalLoss = criterions:forward(outputs, criterionTargets) - - local conOutputs = outputs[classifierIndx] - local conTargets = trainTargets - confusion:batchAdd(conOutputs, conTargets) - end - - -- time taken - time = sys.clock() - time - time = time/testData.size() - print("\n==> time to test 1 sample = " .. (time*1000) .. "ms") - - -- Total loss - totalLoss = totalLoss/testData.size() - - return totalLoss -end diff --git a/tutorials/lena.jpg b/tutorials/lena.jpg deleted file mode 100644 index 9181d48eb6490d195793ba29427a91f8cbb65e20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6600 zcmbW4cQ72@_xE?LE+U8^2nj;;-r0xuNPIRmHT|snKh~74dDDeraufFQ~C?Q0w zURLj`ul|$ocjkHidH#5w=iIqx?wxz?o%d_boSA#B$FJuBcV52GcmV(c0RZ6509;Q2 zQ~*T4zxLM?{WW6HUr$0z3?e2aAtn9KB%>fFC8HoCB_*dKr=a|6H>+Dzl(+tF{yy^G zt3)Iq5D6t2DcOHQ{-4}+8-SJ^UsgaM^1Le+mi&dB^I2s(gZ?m{ zOWejQoPzQmBNOv|?#Dd5PxvGxrKDwKc;f^+uJwt zd-Ts3c6?%T0k^ocyt2BszOlD|aCmflLO4D9%LN30{)=_v|Bqa>H(W%2b3pc&3rOUD zGeNY(Bo9SN>6G=ztUc);iG`8heU|*CvYmocTp!P1<26iqk4s{Kd+#sWf5`stz{3B( z$o?1D|K^$m+yVh_k_Vy%fB|^)l!js}@&89%K-u4_kZ83;#8twYwc~cc|{qQp2 zk{=6($i7mdsfv4~WVVD+Dr(m3G(tHEh(Wk36C&U)2Lk1c{RZX}m$aFqdIQrT$y;(l zg4NF9?QgL>U*@!jA97dj=4Mw4#}}sH)CG8y0SWxthF)eo zeiXe-gJSh$?!jXZIw!Sz3Rs^zJ=$b@9jN2JH*upDS-C$J46Gn zsL=s&5$BO~)gZXvLqRx{FFN9=e{J@OZSssv4>4Rm>38uVvZ$42y1mqi|NETQ{Zcnl z4K8LjJQ=`C?Lmv&SY^krxZ0h;`={xTD$SFo1BxG7?u`` zi!X*MV1@?CdDudSBu~&?mg6U#p${Yqb4+XGls7V zEI=j^^Vlx%#HXIch21WSh5ERSKkhzI=Sk^vsR9ZNF0w+UAOs%0VgN7gGcCyY6JcP+ zEU-vpE+gp#BlE;_cI&TSQAKrV=wuA*K#E72Vrv}>WwYK>%Jy!!48SN%drB@OHGhr! zaFsu4B_ez&3|KW7l_06#->u1sVC#~vczLXJsTxp=-|=U&SeliOw2$APMWPXo!@xNH-MWG>c_!vz`h z1$SGr!Gh`jS%X^VZ(3*?uK^Gf<`5RU{qkSJ^x@FW&I_=pr~5Xue1%RwMvQ+=hrmmc zRvJLsp^`G^rt5ixL>y`~e|T^0A(|3o(+cLxzUohGsy-YT6h1e(21LSK53>Z2ZZPJz zgh7?%0_S0;vC`1QN$C`oYXHztIqA%q#@QiCV7}QfN7s{;a?x`tIX4WoxotjHJ0D+W zd8~ym+s^HA!yvyqYcWGhhgQnU)n4VAhHVqF7nRtJ>&hI9%p$0pZRyO#0Gwt`H?*DUUgNG8$EC6cv&gm{78>WI5V&1jsDR4&nv+wv6M=HEyP z%O*{mcX9El$ksexvOFbM&!Ja^uqo&PR_L?{4DmA`W7t?!-QTU^!j4^haq z>U|ujs7CBSOa$OEDqqhRPVpn#RC*FM6dWp4 zkVKlz0`|>o2FWd@VdKBT<-GR~l2Yvwt{Bu7)!M<4p*5?Wn~11u0K;L)H&jEM&erzR zXVjQ9Stdpi+4ftvN1k#hNmW>>BiZqD-APsVP<727aZioN3yC7&nLV%I;z>GMnJ}R| z*`M$T53IP(!xX<;%(*F`JHcTX4H*#o%fk}fr`jPzx7FkH=)O>zE4F~91;O-cd;uo? zouFoCV`?3Wm?qucAah0Mbu#6@#f|lfo89Pl*MKlEJ4_UYy`Q$)LCulKC#b7j=3?aB zF~Z&USOIy~>-leDAjYKbvxDUbwMzRvG~JFsEh5&hZ{M_QxQE!aJNuEc@igE^pKM#- zdy@t$_3~0JwT?Nm3tGj?sASTtBI7ng!R<%t@#(HVUfv~nYP=7i%B|dSxh2VDNVB))mpQzN@MQe6y=y=_1Q7Cg*lj7!<)@9C z`yQ*?cm?MqW_-gi-|Rij4=&&J{nafOyusBy_V}UrqbcCyyl(@EO!lYm8aCqLAB1PY zPvx=bFgOb^%$Jiz@Dg^gBF=1E9Wo-}z+YQj423r>7;TvpSrYiLdV7a*;m*>WKZApx z0m7JwN+!rwdw71!Y^y@eA*@m!WzB+ZtRI8&SK{zIIg5o-qd$_q7k-?7sHNI*eASd6 zcnu&SN+j!;GBFfI>1qcbA#;5~1(ff`0J*()Lff#v!Kh1g7T!yi#qsA>KpXok`+}u7 z0Y=YVL(R;UOQcio{Wj~S>ORz7@ti$qbz)gTA8%Yt92d9CNAIDxc#51RsdL3PENHRi zuI|<$bi@6A*x(5Yk}4Fp(P)8fnlpY|45uZbbjlGhFmNpLu*>a$eg&c0+ZXQTonJro z`)c4CK)cmShf2We=b2J_w`aT=(-6q94kk=k%&M^~^9ieDJ!P7)P~GFixrW}_B5r(7 zZ)w4qS`GI?G64?YA8tWAo-QSSGMV@AN*DVYjuzM1oyO9~!`Z1A4JV#m1Ms(py1`6` zkf`Ib;P1|aCTM-aH2_I)?O&I6diR9 z`1MgQ)ow@VsX>Z&ct~)!k|b8a?7LPdj$#0}FF5TG{*v1MN9u;Eo9_S0xM5Kmv?VMJK*KZrQlPmNrkae2#ABW=et^pAt zGVUa6^=5WWV&a+-*8Zz(L_!z9T)X^l^cI_VNc*vAd~Ye?Jy{0J8w!?(z#bGAVxPZl zpfVuPMvyo)h%4oM=ySi=h1)Bz#1!No_swBe>2AoB%K}fEpt6FjH)}RJrkv9xL!$;H zTHq6CZ(Yk6(Kbz3b?0vjbrOb$(uF85+}%TMOYVVnekpK$Vm?HY6C`6A{(J)wai{C# zWkvyW?F}`!gghWO<>d@xu^p*b*vn!bxaaqi#8P8c9lz8%b1Rf$@`lSGPwjHeuquOX=?vs+%g8uv6E7 zYD6cCWi*1y)yHRi@Z$>ZIfgr{A^GwOk{Zm5OhVhCqxLV92?cD^$q|rZoJHUR7q-fz zw&kI!`{#r0wZb&(!|Ni(T7A&o7a4C+*t{Rt6WC+7d4ksGqTq&^d|;6^;B`)D^aPbKMtR@g;qA z;7QZdq|RfyZmGN?1+iFngsa9a8vc4Fq2BvML$%ttyR0F9qFI!ZO& z9YPz}SXC_YZ#Vro(R*_~frX9abKyhVTOJR!w?4q`?ADI&a*RIDhJCAajn#jVw5@zk znQkGqrAe%TMdu4xi7@vXD|sok*b}rQHeG`f1g!v)i!*<~o*UFj>JEIq4YZwNFwi$w zd5#yH{oB1-R7o!*M)sl?467Lk79-y%!`VO4VSFL3W5tm6ZG?Ul^F1d$i=Y`>yoJ+ ziJ;(rOR<1^whF6Y0o4=Wze3U^l+1A#sB<=_S!x3H{C4PZ2_aW-r(if|E1&iE#Te*K zGxrBiwd9n-Uri#*bb{lj70`lX4?-j|VE@=MCSS3cF}oG%HRAlLk7(78fgzCUe#vXY zwbIW%3jJ!G!|N28XN@G|R?fM`rU zV;xFfQisf(#W6GF$FSNe(n9pS+EP|Qi`C6o=rwxaywL>u3Qetd&>-(ymmkYcay5>4 z0Coh&Nl9g}GlwYO-J9)~NDhFpr1TdOLreUAM3Qdhzl=JClJg8IiKwgwK-$lCws*Z3 zQp3hkvT-Xo(BO@N*Ds)=7JttDH5bk`Uq_8iihBFQ{<)(oYoW0|*^I(8s;AtsEnN^z$2}-xo^=G@M(4c_J2^^)Z8VIXTN_yEh-$ zXvO|x8vHF&vd&@Lc}xsKH*1w8BM-f?cn#<=%w#|A4$heqOHa%6XbumG>%sFxmnksL z3xYfAtoW4Ozj$MvOAK!!pBBw(lsX3`$i2yNw`*ujvqmNA2yA%(F3zw&ZCnK2JoaVz zeW(;qp<#vpSm++VOLt=V8<$&9D%v9-rNwZ>-ies_-I0G6pV#^+bir7XzfF41eA;wZ zP!#0B%nED+O~&E5yXu)Vx?gC|=qTIOXYn_vS{B=`pr`z1cPuB)pcU19?wtFhYAM>w zZ6H=evE-g*H-Fdf`)$tmBQo6hrO8;b^$$|5J{+x0SOt!Iw*7=wpSx<6&V&%>W1qwQ zTeEOcGlI4=5>=vx)U>r-#t^o0~$=Aj*bAFM8h=5E9u=LARLcxMdzL}X4m zEi~8^wSf$?1jb{l=A4DR-7fTlnz0A>nj4HSz{O^>98moYnRuxcznNP=69)%Qn=idq zeP63291rt6B}!JRjUC+h`W(krl$XrAmrHEw&wp@DtEtqNrl|EBg*t~OT8Kup$vJdn z`eX?{O~J7l8OLVyx0o-wdJ;25__&Z|W(jo2MEN0q9-sb{J1*WjTAQgQ!FkPWFml6FR+%)`C3YbI4_AZV*aLHPh&qdS62eZF1cg%L%*+JGO_A zt-bH-za3+}?d3`-#;a}v1pI@}vE9QJ)eTdeDaBq@;Q>n#76`lpuDo_3XYG(6C{$jM zqAJYrS?D6cj+Zz;uSGSa91`YR;Vg0H40WJ-hwpgj?%O(2dBSiFNV*WVb@maadrK_s z8B($c#CQN?FwpAkKlpovc=q;UB3--P{}P z0(-_7uLsZinhcg_!abBZbA1UHLJz#kzt{fr>k=%2vHrebZ_@u9S!~A62^(R*gT;X{ zP6gEnC_FS>>$CsZ>Z2fk}O4HZ7B%g@*@sJlducBOyRL?~T&?`v;o`FvESBJ}>W+ZT8=AVGsVosEu*@VX5GB%i#FrrMlSYZudQq&+mIv3 z$qS9pbGc0_2nS$YjBSEa`A#M4*%Cl;^MFNV^xd$pithSJF zL*FpH(Pwub@B36QjXzwiUmu4_ZktLcVKsj}`n5Hf;>$0czP~T1J))aD^8&Q%D%Sk! z+cITJP63DAd$qMY-?v3Z60%M&ECrvBDCi&0Zt#DI+7W?Q{hMOlN!>oB5U#v*#Oi60 zgBMqBs=$UQ;v_yMhbrLR^!VZ6gli_*V4j4lbZX8e#aQSOu%AR#%o_>@c3M9f=AD_ z@Av2P-`VtuPi!%>EF5yg`{i*-^|HJ&nNFz6pPdmux-#cq+YOVuEDEz}^!?_nvgm2u z?=aa+Dzj80`V!Cw_nb>A3u5eqLd%vD{rAb|y)E+^N4%yl!&~p&&fr(mI=8&}^9sY5 zuSVEuc3U6cRAX&^*dFOvpGKKBg0h{odOzitb_CUm%)cf9@-fpd;Y#-}6AlB5@W>dh zKLUlfebu*jII|VpQf4{Zho0T)IY5pJht2XOy%}DNwPPlis{~W=c*d=~b4@~1 zhf#v-c8G162F{GSA}KKoCFQZI_9FlorMo=?Wrzb>w7 zY!j<85E6_(@63o7?GW$!>_pcFC_T;7?)%c4JtUjUThb!5i3IHW5>t$AwzL#}8}w zSTH^=h`SZlHI?al=&j)}7GQ#hU!7tMl2fiM#-sQ{(o|4hp#6RQ)PI_sP#Z&0V5iW@ zspI#A!o6a%J)Q`qhZ{i_iEO>KBW13JF9St$$#}@P;VWmfs)329gjyMuqNJ}}!+{^F z-Swglm;9K%wx&ce8x%0{=?x3TUnD6NzBDLV+R!u_d3APvT%Y{)GF`ZwahJU88o(-s zUE$4F6Xx{NI88!I=$U#z&Leg9z`YM3G3>jCp`B+LKYnF>2!EkZ59YMx@mPiD1qT_~ zE+Xli<*=PO%(b-i14$5;w}AnHq&<^ntaL;-?N?Gu?yb8r1w(kW6`>M zmML6~6(~Ox-TvpuAr7db%Q|x6YX?gb1%lCBT(iW5_^rjXdZ2Am8rfIg`P*~)T6c|B zC(c6-j$+S-NDH@0&NMD|(k^w(QF@A%iDy2bk@5gJoLvI|%*@JOcGn1VSp@rJG_Uma zGN<-Z)qQL#Xp2v#rzPYXaMM{d<98B#X`&zSN%2A;4@bA$?G<(-Nt$Dd!7~nC;5i9B zOPx<4ycgF1u!i)duae^B`wPLs&`uA+WBWIt^Ow4|GQ8fzx$_bKZqV2OH~7%ozxR+h zvaVhDE`G7gw6rL8fxoEY*Wvl|@sAjGmO%}HRn|%gJ@Y@D%zIp{Y1=wFEeOSz`q4B) uX_H5cOGzA~zEt1P**AS%4=D4i>O3DDu{s;ai~OqhW*l!Xd!VRuJ@G$@%Hx9o diff --git a/tutorials/srd1.jpg b/tutorials/srd1.jpg deleted file mode 100644 index 76971ffb23ba037234e739bbb3d97143a457b631..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6504 zcmbVwXH*km*X>Y4Q>6EzfTA?%MI<6p6d^R}5ETIdAtJpusR98M5CtN=36YL~gdPwC zY0`Uo-Ji-jb77P*hS@(RiS#rLFVuk+I1WQ!{f5OS>2L4vsIKoIO0f zynTHA{3G5*Mn%WO#-+UfkeZhMG2>HiUVcGgQE^FWRdr2mU46s%#`ccRu5NTs@6X|p z(Xl_{6O&VmOUo;(Yk$`_HnIB$hex<${K@G*USt5pe`%58|B)9Hi5L05J^=mWMMmyJ zatbC&s;kn}%<6`qXYMT5WWs1z?z)X{gqk8^~}Enzfp#k+CGxW$Dpe@u#n# z89sL)NxRsr=CqxoHB{xjh9CmMz7L21M%!s8m`4|TeG3;V-74SU*>e>}=OLcJn$qJF zR2CX$EJTTr^K8rOn8#@M$TwUJd7Wtmr3v11;%N`S=-<9AdAhR9b}CRFsM0(h@*G98 ziIWYL^x82tkv@rJo6)KJT$%j-ok7cdY=zKbP=t%OvtyIOpQLy_-=5NIhGWL_>(s!2 zHr8G@=0K$~tJFEG4!Yv09p>w_|8Vip1>zsHLqPw$U?c+J49@dc&(+J> ztq{SyPULxTm)lhFZAE}6<@;8)9e+jw_b;a2+-?vue3%Hdmx}<)nMAl)GVyUl97jxN7bfya-lUGKcT0J^VHf0tI}M{XvzMnI4IgfKo9~F{>iASwiZR~`E#`{p z0T@={-nmh*PvV+pMot~u>ovE#U*j}X`8W-7m(6yXdW%MMTbdA|2#gUNugGCb8X5k9 zyh9hV9cNaSz5^YG*lRDx(}wWk6TSB0NxE{cAkz+Jl918qEPKDs5(syDPQL!lCf-8^>Ly*zDi>r5ElDMyE(_%TTEt2^dWUB2~aUWq$ zqa=?-ZBw1ymFj}<3Gm_5A2V-QinKqxlKkfV9L2{GKDy$Q!Lu(uNN?r%#jw*Z?VN+k z#i=LNQl-X;^#>zZ)xWJ^Qf=AsdF=`+L?Bj8u`hpawq5wM9)#v(ZCw)%s!?+d|g4dq1N1ff*em zHXNK97`7MyRT!Sk@fHp(&;F~{fD{rY0?FX>W+I@mEgPelC56e`gHI0ZFQX9{h^yj5 z%<5aC&FsrqzS+pJn!Fo*X;>t^j5Ot{WQK92^f%rpB;`=Wi=pF_lxfxLOAqDFd9%c_zFRHIm>d{ z7^~Sg^KL>h(x_F6C-!lQdy>}7@+#Ub=nl^FJne`GOhK01vWoBlCc3UNQ&-QMN(Duj zQWM7>c=kx}JkVVz9^~CIKE2E$*|ut~ZCajHk6H=#f)5@rEOQnwx}_f0T_AgKd!hU{ z&YMG=QL%;uFg^gwzcvVoGHA>$!O?`(!^tuWx9fotW_Ce0Xr`M%h+Z&*uSW-hPVSBb z-R8ax#4?dH@B5fr;)@3Rdxct8P26sT{Hk&E`B@n*#D5nfw8$QNb{j8XjA4va8~qeF zEAh#;rhmlxfzX`(%z{>n!x-0e+Ff%Ns5*fQla>8tuVkxLb^G%JnU>WAeHHA1fJgHC z);*|l#u8@CJ@dn`;fB2%`uEe@O6yN;EQeuvuJgM}(1dx60TBp8R9`O;h;++Kx5Cc0NOpGm!*=avGIO{heI1l$vRD&+KoKcs{Rk_aY;UzU#&{7jHk%W8VWS8SQ z?S%I1HWE`$hR$ra8q?pk$e~NEoqf#MjeI?HKR-(Jo9t?4p43$68oWne4=^}#QgUy$ zFZ_|oRJOPG>2E4wT-~|_btNOekk6H*V-56sc?YvI5fD~ zzYZYG3#!G4Ef9f;kVQEnz^W$GJ`cQpnlbNnt}E5i7FAW9E_2^68|hRDs3$1bpcnss=c0d)_bAG#5HB_{6#Ib4I_6= z(+2E(^h(e;a}PXT=AlbZZj0kY--!hdY*9p5UqdYvt9X+(mXj+&>>SB#H^%*sknlk1 z!e21=0wP_x(%wPUqYojGOF%c^u3ij4s^5>@56otk+v7ATP+7*2b@!`Qq(5P z_h_z}ywh=Xiz2oqZvhb<(V=q3X)d_j@pfaj?g?b~igvTsQ+tXxJ=BUXqPRe%hnPg> zLNG12uA5^>>&8;x6LA#tR$so^+Q2-n4hs*xpm>?a4`2pkVMkId8X>S!F3(>UPxlue zBrhw5%O^efXzwIplX9Of`wi_$$P)keB`KB-GbnSQHp4MQVq{wRwzv~#DlbCf%1#2S zY-#aXK`if;TZvyAkuxF8=OTIG$gwbgdDB)Jo1L`?22DePN}+Rp?9zT(n#nv2-zmf^vh*o} zW)7_pf54aUitQoaine=Q7LXLal6(Q^Pvx14OiohQ@@8GdyO&n0_wNtGo^YAE|1c4B zyort8X3Qsm&oc<1rI2aeWzgha?r(u@TjpT`j>57OlDgB^f>8M+sW0P@wwEuI`!OYfr7rLbTAZspemhl&C8ih+Z zFAA1)N=pax5%gT;{Ns;hv=@CPEz&iaT6!klCKXzVQ^<&60443~70Laa#;E%9~82?K| zAmM_7aFbg&*sklVb0fc5=FjmxRgJ`mbhEYeA+?Hs4s^ zC_c6O@hngtQ=8tg7xl?3@~ers_n*qQoPVExF&QLKGWL&}9k_7UmBDg1Ojmbd*L4G4 znjkVJ7RXMHkcr)9=Ka>2s0{&&h}>_=K`IYj?2q9sSjqHDy@{II&qk{vG=m&5C>#vq z6HefB$({36nKo^1eQ$vJUUzv||K4nTBC8$XPbbzENIH6!?=gwXWg`1g$c+7Y0;}zx z(~!yF^{-OG-D?6j$=yWWN&Z$^IkiI!qtw*oYztF7yT-gMxV46SY_giNpC3~zFtM~L zU8awCzgjmE?|2Blpu4J8?R6$?J>}~!7L961;gon@9aA%+vt23_FKOPy5?ayqXgw`tCt#NGwmkHNU!v#=x84<&IM~B_GM*kT`8{ z48A~5^vEgr)jVd#X$e6?8kW%EmVU2~U8}Xr_RmP{rK>WTcy$k$)%LiA0Zo6$4%o!* zX~8>=9=bFPMR3_x+f%&{1JzasOLFp=ZDt&~u#|~ahEefRQB`x4; zHPY)Rkmn!WIZwTTb6rHT&wX{kF$fzalwJD^kOLqkRmZ!n<=|*;^fneV-ma1kuNh>) zhX?rw=zbPcdd#tJmG_Wy>Rseyc=l7}1?Glbm%(7JOpV5>qpgX!=k$cN_9Pm~)?AO4S47 z%PV*Btwd>a7CwHE>A>~0z@~S<+;cN!dM!}u%#vUPc?TA#xc2S4MB&SEuzj8It_7df zalCZf-&N2MNOipIqQjPpB={nW)ECRv`+wtg(FtE^LKg$KB#I}uDU)yVDceD%@r?Sq zS#RbPK2znTX1<@cJ3@X%Eo7_gCND?`Epf!iPgk;1oUt}-&!+4Ip!w&&;E*V^k@@3N zlM+u;?Im9ZGh7txC~n?a52aZ@TQn17U)Dq6XXwsp^X2sEQ${x`PiPe)dKC-Z;m9$k z*IVN(Y*Zf=yS2dY4_G#j1>f+!V-q&=c)Ywoq$>Gj!hQXxCcKQ&)WG>r=3ZDe@;`;5>mCG&l`v%an>}q`6UR6_? z%z-H1<&s^0LrWi9FlG%QmF!cjNxyuD4!)%snA@>P1Tq9X6%?zy+K8so3&3wm;eCP9weMyUH`js0QPx9m&T56O)vtosXv;P*Kwl;?%cUo!G zz@IryoxGqGO0n7luIFVnc9C1q3TVB5U&+PKY(XoFCmJt}>dxEz_Br97L76)gGw~ih z1b@t2rN`o|n}iz$?e}oKp*_{Ykv@j_o68ZI@heXk;G@mpRE(_aj?fVHzMFD3pAUS~ zpD|2L)Oo9@BX{z?$Ub5l2Y-hfDcRS{+u2dOM>|8^I*u3Lkr~Ir6tX(YrtFqcGCKOFy$DAQq-aXPf z;Kz5zlkZXxJ*hx(wE1{jDMs^R>O_H}*3q!05fWLaO} zBPAxdiHA1{KOp_rPHq*Z{T;N7tX=)_Q#*(zQC)SYueKw!PiU8mG78Af6N)2AkV#aB z;;Cb7v(!?Xw2(=|_gc~XT^di^@#x#gPp#4N3)2xVa9V?f1fEM^X6lL;s zxi;7oYwRWEas0{_s$a<0t}}Js7_yKIrA0X-qdui~!9(XRf5Xgoo{4ceRt4Rntg9bS zqzwp&8K+%qm|+j*l(t#DH~a|h0KEEweL(uv{>e4bvH))uCZ5oC5zW;(}=)$MNx@mjBUPu{3Wt8OAXqXg!*rQL?)`e)-~ByylwA{Eij{Ve1on>4?G_b ze!>HPmfgT9VDJ)I(($ZhxTBrFU9wlKwnYAxp|N+l$c6-CQ;3_NrPM@+K_+0X;cz(s zPC8Vowd5tf9bc|7V}5L(qLK0|O|5zP#8A_WETpHR2tU69m+hQC;UcL?(592qx7}t7 zb~BCD_aCQWf=5eD^RdC=V_tiarX1X*9TNR4mTkD|D~#7pAhn+bBv+XtX8@6hmzpT3 z*iI$D3y%qZ5LiP;+_P`hmE4Gg$i_au!l_5NTsK1haH5UzeSxJj{(Fbi|T=;TT`c65!K#CJ`-o{`}zgUGL)R3D%?+ zFMzvBl47dmWuS0 z+JQvzODoX8c6Vw6=reQ%Gbo==nU9{jItdGL5&`qM8bhd02NBqPNidEbwpgyuz#J1; zAZ=3*LNEphG8^qR24~qd8IzprQ*N)-SG+)#ZM>nH5WHv+ngle2GsKAlZ?%+Va}7fT zBBT(I-*Qc?(r7wbg$y3y^rTRke#kdgL(9>(H@ZB(r|#&XI`@ko>57y&;^4GnM*-_z jH@_~XzcAffy&tFagb#E?c$ph_8dG4`;gNL=CQkk@ofMo~ diff --git a/tutorials/srd2.jpg b/tutorials/srd2.jpg deleted file mode 100644 index ffacaffbe5eb458e982373e08fc8e4b3700deac3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6478 zcmbW4cQhRD+y95fM(?6;5=5d#iyA8tL_|c1l8ClS5QIbxHi#0$hA2^DKOs6b#-=?wOIC%&-a|)@1N(7=XtI<_jP8@Tr=nWntSeR-a{TE&jBp=wRN=t3JL(A zINt#BB%ldUQv8j-neuN?f&OM{Dk=~a4K)qTzXMErfd)(qrlGk&e}R_nZ=CNj(9<#e zJ@|W*|6EE*4FXZqfoZ`1mi(_0xe;K!0N4ZeAc`vhB`XDpm4b``Am=)%|CM(x_CG;E z38JE=0iVmDKQE|cIhPLtor|VESA1R_bbcP7Vx?xgEOVQN-Ov_%#gjueB>w9Kp*ugC zIF0&o!g6+Ap|o^d7rA+OMXrioyDlcLa8pr9Sw-`%mbT74-TTHSk4??YEuO&a9UPsW zJG;Dk?d{|1=N}OEE<7SK>V0&=r^KY>l+USYS=rxma`W;F3d_nXDyyo0*3>q)U|QSS zJ34<43=R#CjE;>@%r7i1En`MKeJ&CM~mp82Q;a_~GpY@tv_s z@o(=a`6R~qeNepyL?KR0@)n*U8_xCdUS}s6Kuny3!ucN_8a-J%WDQCcdaS6*QMH-S zsz<3!@5bkYNMs#B-&l~4E&4LFSACX%SQDhgXh+i8PM)28IXx11X~6FCSs!AMW+70@ ztE4+M4ia2R8>%Hb;PdW(@c2dyD(Ga@XxZ$4|2Zt~j#GPp&Z z{xnD4;pfQyVZVa;R`UcgPMCEnkNZ*oK`IT!#2mb=BiMyBd-@}oCeY|K{!9cB0bzHH z@Y|6X+0YzygLyg^G-}MiBx6;S$5w+Kl+=o{6E)21VGsOgyV_XxLS`Wk;9~o(AAGLE{Z(VdKKimw;!%R9nJ>N$H?UpJ;$!CH zGAepNbjGJ>5fQ~M4Hxy$(uRH8Y2rmeQ)RI>Zvzhg1Y$)Q#VQMRrNh^QtRU>%{W-Mq zHOY|!&~oPX6KJ?KXLiLfmhr30SjBMm$IxhI`pe+H42Yd;BN>>>CIjWu$W2$g*9xZA z{t9#CnFw?K^OQ-r?{hu(gh`>=WNNTcZ1$I2RE(cv%~dlVis~8=K+!Au7Qi;#Ap@aN zWT5Gu`lYiWLGFGs;HVFMtQ*KYS2RF4bV<({+D=rnGYx*lDSU?7&^+2^b*Wd^~P^`jKZw~?AXB+nB!l72m&p#-=Y((;x)s{a;q zLVi|UQZ7xkBj-9vS3BF-3BzF3p zhQ?u4I?pgmvH0-0-B)eOsJQbd^--On9`E$FK|;?-`lezgB(cp=jC*V7?V^xqK4B_o zV{<2*0Df9-d~3X_BFfHnM9YvZt}v4fXxyOjg2wE{Zo63Iyv{6WCj0(oJPVf)@huv*a`f)+uxG#h+gos5~>{SWKF+{W-5_1 zYIasoB@T)g>Tz|1RQMaFI??%FNWp@zZuvZ;2S{_GrvpBCyTzJ_ za^>rCqSq3rV`yE9Di!B=OYQr}mxcBby`ltn*jW8x$QdO`_2*EYCh|Qvch5T@la_VqF(!mN)GCM-JLY1JWr1oHb?`u$*!9;0*O23B1TOOw?jVt zQaDoJ=d<61Uk+pPMm=t(v`%L)_rX zkv-xa4cRIww*8s@0&HmfO5|5(M-OFpv`TU!aNEXpv_Ue^7BP4BxolktX^4)q7JnGM zRD|Jtik)n{L~&LU#s33HBTdRSJw{<3A3Z**f$t}xH4l4Ox?vIXyz1k-ZTtie$Bgv8 zojPayPRs0tO^D&Jcb|05&(d^T+BGZ2k(kFBo zOlw;wALQz?yhz{>mX&4vQ`ezk#Jx*_0^<=FC>wFb^yd)C(ChFDfpOlXl)i1jSUAI; zwOZ-Oa}xRR^b6FX9!~f~OwV&Z6C7rnd{7kycVCa9z3Tw0G!&4f*Y0N{ipjOBnw&ml zmugxz*D)>mQjK2n<0g@T8$B}7aa?QGdLHW}ggcR90Y2GIC?Y07TF(0cnsxidKGA`` zU>#`_#pj3P`R(5Ik}m$T9nJDl+cJ26bgn&8O&OQ1S=RR*B{Fo7KOe37#dH;`7hytY zZ^x;<;7gj+_6;oJz9O?YOR3KK99hmBasUO0##_4iB?IF*8xQ^{VD8*n+UlWl#kv`O zseuiLDC|R;khJ6LOvjmZpq|bOP4R8>p+yu#=1`eOjp<-7HB*|=jHgpyL@?QOGl&KR zii@BgF1dPqbNXyCoH$JesF{g8C$cVvMALO;Z#3uh-D=veYmM&FH`j#slqPv`UNkF> zqR#xI-O$SFt&4b9OeT#%)NJKp+b!vqU~t^r)bkl?9MwB3SFe!p5XY1ipUaKDJa>)8 z>3@Qtq>Hoj$wj#J!6sw{bLf(cn%8N>uK5sUBg<;BwyZiq{qvgj7f9vyuL8eciw^>F z+VZ?T9YiDLYbx?rzGOUYl1wyyVYv_ddI_|7zPHAw6fk1yC^eM zO6RqVbrZcfajbfj>kt?~oLoRpxGoeDdrkV?_7CEECiVMXGs^I79(yMW+28DU`m`tG z2xDLMb7!|F7wHVsU$vPGcaF=d=5NDNTpn`3wv$9T9=jq0h`~D3o)S^S7UAT&XmM;% zEIJ7ZJ4l1~Ah5e3qiNSrW4&0-^;x1Qw3XfIuF}8z z%Tnn>iv~k{3PQ{hMzx5rAVz-oi@_W}9>pM8TW$?@y(aWLpy$;V(Gl&ss|wR%ST9sx zv(urXI~pXY3@dyQZ&JS*B&9g6c5JW6_P**JgH*dX$zsW=Du90}NE|mJV76c#p3|px zKUpO>RDL6M`#`65LbH|c-n-?t!_#pOX-1(V2be&S)B`~#BaF$69=v5=y&PoC6Wci0 z)G}{2n7g%cm|-0js9NCH-WK5djs$VLMJRy#FPXP4^KjjLA-lv-R_m?Ac=)Q{3vNzQ zoSUURYetrfphD{y=YD$c2)b0goYMMM7me_)DCdhw?(L-4tv7M7Xghcml9AU2DOLZ3 zBLf5{9OOM2aFdROI5os*z<>DZ>Ftd%x)96#Wi;axau#Ll-nz#r`Lbx*oPth-Nyd0Z zOvxR1G?INMnG8G|l@`>0qN8!{+$CJ<-hE)9(&87(7XU*zF&42$Qb`M6>e8Wla~a9p z(RF1>c_O~k;*$o(pKys}psIaKIM4tuhZ(J<`B1W2MHo%S>-tNc|7_jd{o@MKx|DF! zWUA3$pPNp_z4Q2lU{73IjA{V;mA4%aXXc@v5%pTI^KMVH6wTuJCGKs8Q6*oJ3u)|Z zDuJ?D6X_q~f_i@Bu2q#<^1Z^!wiUJjP9MQX%7u{9s`}hnzw~x1uF6POmrRD*oA~<_ zW-{6i#=R%+%)08HF$7;tGc>e%_AK@AVE6a9qnVZEw)KME-7hoI)69Q7Ceefsr+L9r z&dDiI0g}F(d_e4>tj@fjltqdbYeUD_yN|g~B|x&`xHl{Yi6posAG8iQzFi46b$(XL;iYH; ztG5kQz*VNS>_nuQg%_Jh_>BB`$Gu|rk4Z0yin)8pY|oXqs>ma2&2)L&Trl?z)PhtcE)Mx*bbG2X$3jq&;$>hZzxH? zHEYICWzw{+@zW!8O%>C?LyhU!I1U&PKquZ5s1p)fvcoDNpMmN|qf$wq`n5Pu)<|&hD$Q;c%A z*;D(Yuvk$Gt^IPlhA~sg#)2+B>=#JMj&FT-QjrjX-$J#xiO!|1mOo)v_|s60(!Amy z5PB4|+{)G_C1K8PB0mH zosM8?^|gIjz&j=54cKt0{#=Hwx#YI`No&EldGzyc^YEh=wQJCl?~piYR02G*gvpj>@7iS}pfWp(+@E^oH^Ksr*seh^lEwyqnUS;>6U9<~K$F+a7@LxoRHO}W9gzL7O zZU&<>7H@P>i)|Yux#pQ9&&tiiCq@?%0xgKAo}A&GZgSc+Cz3xzy*k!s>ulqgq1V)- zl(F^J1YTS(NkUSEnbBI?F|yf8&CTX6n102=O}p1d`2D@LF5V+VV}O_CMl!x)Op?EN zDRUB166A3H=HW+QBPg69EOocPPe=U&8Oxb<+)QzuQ6ijzD}&Bd5}F;1a`dvwpzMuy z=n6S_vFVAC+p%QBmipp`q?HEW@0h6OE$|=+c=J*`>ok!Bm5wF@&+F+LpLpD8RTyCU zYGDTK^-&IEi}0#V6PrwFj`eQWB@+=nEeOr9KX0r_Y`^8cpA843M18l1j2}b7o%T9U zvMd}wz(BWpM#(^f>w>4ea4Dy9Mc?rYQy4GpAD00iDD&s~@NQD98{oom)SV%nRW>UvdiZMj7d#&DJ;Y##1)S3`}ag_{=fiS*; z0x7NZx#MkzW(nq#M`hQJF4?}q7XA=&c4$5+u-yzE9a7`V4t$7{O3e<+<$J1cY=Y3A zei_?+9J6K4;YOYM80Pj?h|?$bWy0^WKh<1%pE_cG-A^&MvKBo|N7r7hN>i?Rc)0Q#J$$0mw(xep*Kkm17F~V+xSHUL=R`#8 zzZ-mgV0o`G*}d3cAC)>EvC#Q7$p`mS#K5|n3}l$)k*?z9llR&o>HQHVHj98e!Q3*>mi{?{t3z!hDHm3nGOw%5e7W{Mn#v9%p2;g#JmC> zaJZU2EdL{Y&qqtf?(BpM8y%=JGT?47u`t_$F0~XCPhxrxjnVi~Hn$IG22^REOu-+Q0e*U5)Iq>k)p;hp^53 z$qjk*997U_TQJblEZ8j7DPCEiAZBRnQzEh^$=nrvEeov)1#Te-dUp$0L34YT2fwar<_6Sw-&5CepCYToeq2X((h zd)N>^EIJL~;^r2v9)^lfc zCy?43@>wRo_j9MwyL9K8CUE?aU^|r;1t)Ya3kf+BA=#dOv!3^e_I@4Ha>mdQ1l1VW zeALc=O?bloMS|5Gpi4%*(NN3Phy}?xfHkYUNe0+|f4we|T!NfX6Ez#&`8t$a9`P*x cDLRSn+PukY$S6K5T73OV5`wYEcZxjzU$iZ--v9sr diff --git a/utils.lua b/utils.lua index 3a1be38..4c8eac1 100644 --- a/utils.lua +++ b/utils.lua @@ -282,7 +282,7 @@ function nn.utils.recursiveMaskedCopy(dst, mask, src) end function nn.utils.setZeroMask(modules, zeroMask, cuda) - if cuda then + if cuda and not torch.isCudaTensor(zeroMask) then cuZeroMask = torch.getBuffer('setZeroMask', 'cuZeroMask', 'torch.CudaByteTensor') cuZeroMask:resize(zeroMask:size()):copy(zeroMask) zeroMask = cuZeroMask