|
| 1 | +------------------------------------------------------------------------ |
| 2 | +--[[ CategoricalEntropy ]]-- |
| 3 | +-- Maximize the entropy of a categorical distribution (e.g. softmax ). |
| 4 | +-- H(X) = E(-log(p(X)) = -sum(p(X)log(p(X)) |
| 5 | +-- where X = 1,...,N and N is the number of categories. |
| 6 | +-- A batch with an entropy below minEntropy will be maximized. |
| 7 | +-- d H(X=x) p(x) |
| 8 | +-- -------- = - ---- - log(p(x)) = -1 - log(p(x)) |
| 9 | +-- d p p(x) |
| 10 | +------------------------------------------------------------------------ |
| 11 | +local CE, parent = torch.class("nn.CategoricalEntropy", "nn.Module") |
| 12 | + |
| 13 | +function CE:__init(scale, minEntropy) |
| 14 | + parent.__init(self) |
| 15 | + self.scale = scale or 1 |
| 16 | + self.minEntropy = minEntropy |
| 17 | + |
| 18 | + -- get the P(X) using the batch as a prior |
| 19 | + self.module = nn.Sequential() |
| 20 | + self.module:add(nn.Sum(1)) -- sum categorical probabilities over batch |
| 21 | + self._mul = nn.MulConstant(1) |
| 22 | + self.module:add(self._mul) -- make them sum to one (i.e. probabilities) |
| 23 | + |
| 24 | + -- get entropy H(X) |
| 25 | + local concat = nn.ConcatTable() |
| 26 | + concat:add(nn.Identity()) -- p(X) |
| 27 | + local seq = nn.Sequential() |
| 28 | + seq:add(nn.AddConstant(0.000001)) -- prevent log(0) = nan errors |
| 29 | + seq:add(nn.Log()) |
| 30 | + concat:add(seq) |
| 31 | + self.module:add(concat) -- log(p(x)) |
| 32 | + self.module:add(nn.CMulTable()) -- p(x)log(p(x)) |
| 33 | + self.module:add(nn.Sum()) -- sum(p(x)log(p(x))) |
| 34 | + self.module:add(nn.MulConstant(-1)) -- H(x) |
| 35 | + |
| 36 | + self.modules = {self.module} |
| 37 | + |
| 38 | + self.minusOne = torch.Tensor{-self.scale} -- gradient descent on maximization |
| 39 | + self.sizeAverage = true |
| 40 | +end |
| 41 | + |
| 42 | +function CE:updateOutput(input) |
| 43 | + assert(input:dim() == 2, "CategoricalEntropy only works with batches") |
| 44 | + self.output:set(input) |
| 45 | + return self.output |
| 46 | +end |
| 47 | + |
| 48 | +function CE:updateGradInput(input, gradOutput, scale) |
| 49 | + assert(input:dim() == 2, "CategoricalEntropy only works with batches") |
| 50 | + self.gradInput:resizeAs(input):copy(gradOutput) |
| 51 | + |
| 52 | + self._mul.constant_scalar = 1/input:sum() -- sum to one |
| 53 | + self.entropy = self.module:updateOutput(input)[1] |
| 54 | + if (not self.minEntropy) or (self.entropy < self.minEntropy) then |
| 55 | + local gradEntropy = self.module:updateGradInput(input, self.minusOne, scale) |
| 56 | + if self.sizeAverage then |
| 57 | + gradEntropy:div(input:size(1)) |
| 58 | + end |
| 59 | + self.gradInput:add(gradEntropy) |
| 60 | + end |
| 61 | + |
| 62 | + return self.gradInput |
| 63 | +end |
0 commit comments