inplace HardTanh, subclass ReLU6

szagoruyko · szagoruyko · commit ac74eef878e3 · 2016-06-18T17:30:28.000+02:00
diff --git a/HardTanh.lua b/HardTanh.lua
@@ -1,9 +1,13 @@
 local HardTanh, parent = torch.class('nn.HardTanh', 'nn.Module')
 
-function HardTanh:__init(min_value, max_value)
+function HardTanh:__init(min_value, max_value, inplace)
    parent.__init(self)
    self.min_val = min_value or -1
    self.max_val = max_value or 1
+   self.inplace = inplace or false
+   if (inplace and type(inplace) ~= 'boolean') then
+      error('in-place flag must be boolean')
+   end
    assert(self.max_val>self.min_val, 'max_value must be larger than min_value')
 end
 
@@ -14,7 +18,8 @@ function HardTanh:updateOutput(input)
       input:cdata(),
       self.output:cdata(),
       self.min_val,
-      self.max_val
+      self.max_val,
+      self.inplace or false
    )
    return self.output
 end
@@ -25,7 +30,8 @@ function HardTanh:updateGradInput(input, gradOutput)
       gradOutput:cdata(),
       self.gradInput:cdata(),
       self.min_val,
-      self.max_val
+      self.max_val,
+      self.inplace or false
    )
    return self.gradInput
 end
diff --git a/ReLU6.lua b/ReLU6.lua
@@ -15,21 +15,18 @@ function ReLU6:__init(inplace)
 end
 
 function ReLU6:updateOutput(input)
-   input.THNN.ReLU6_updateOutput(
+   input.THNN.HardTanh_updateOutput(
       input:cdata(),
       self.output:cdata(),
-      self.inplace
-   )
+      0, 6, self.inplace)
    return self.output
 end
 
 function ReLU6:updateGradInput(input, gradOutput)
-   input.THNN.ReLU6_updateGradInput(
+   input.THNN.HardTanh_updateGradInput(
       input:cdata(),
       gradOutput:cdata(),
       self.gradInput:cdata(),
-      self.inplace
-   )
+      0, 6, self.inplace)
    return self.gradInput
 end
-
diff --git a/doc/transfer.md b/doc/transfer.md
@@ -15,7 +15,7 @@ thus outputting a Tensor of the same dimension.
   * `f(x)` = `x,` `otherwise.`
 
 The range of the linear region `[-1 1]` can be adjusted by specifying arguments in declaration, for example `nn.HardTanh(min_value, max_value)`.
-Otherwise, `[min_value max_value]` is set to `[-1 1]` by default.
+Otherwise, `[min_value max_value]` is set to `[-1 1]` by default. In-place operation defined by third argument boolean.
 
 
 ```lua
diff --git a/lib/THNN/generic/HardTanh.c b/lib/THNN/generic/HardTanh.c
@@ -7,37 +7,59 @@ void THNN_(HardTanh_updateOutput)(
           THTensor *input,
           THTensor *output,
           real min_val,
-          real max_val)
+          real max_val,
+          bool inplace)
 {
-  THTensor_(resizeAs)(output, input);
+  if (inplace)
+    THTensor_(set)(output, input);
+  else
+    THTensor_(resizeAs)(output, input);
   
   if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
   {
-    TH_TENSOR_APPLY2(real, output, real, input,
-      if (*input_data < min_val)
-        *output_data = min_val;
-      else if (*input_data <= max_val)
-        *output_data = *input_data;
-      else
-        *output_data = max_val;
-    );
+    if (inplace)
+      TH_TENSOR_APPLY(real, input,
+        if (*input_data < min_val)
+          *input_data = min_val;
+        else if (*input_data > max_val)
+          *input_data = max_val;
+      );
+      TH_TENSOR_APPLY2(real, output, real, input,
+        if (*input_data < min_val)
+          *output_data = min_val;
+        else if (*input_data <= max_val)
+          *output_data = *input_data;
+        else
+          *output_data = max_val;
+      );
   }
   else
   {
-    real* ptr_output = THTensor_(data)(output);
     real* ptr_input  = THTensor_(data)(input);
+    real* ptr_output = THTensor_(data)(output);
     long i;
+    long n = THTensor_(nElement)(input);
 
+    if (inplace)
 #pragma omp parallel for private(i)
-    for (i = 0; i < THTensor_(nElement)(input); i++)
-    {
-      if (ptr_input[i] < min_val)
-        ptr_output[i] = min_val;
-      else if (ptr_input[i] <= max_val)
-        ptr_output[i] = ptr_input[i];
-      else
-        ptr_output[i] = max_val;
-    }
+      for (i = 0; i < n; i++)
+      {
+        if (ptr_input[i] < min_val)
+          ptr_input[i] = min_val;
+        else if (ptr_input[i] > max_val)
+          ptr_input[i] = max_val;
+      }
+    else
+#pragma omp parallel for private(i)
+      for (i = 0; i < n; i++)
+      {
+        if (ptr_input[i] < min_val)
+          ptr_output[i] = min_val;
+        else if (ptr_input[i] <= max_val)
+          ptr_output[i] = ptr_input[i];
+        else
+          ptr_output[i] = max_val;
+      }
   }
 }
 
@@ -47,37 +69,58 @@ void THNN_(HardTanh_updateGradInput)(
           THTensor *gradOutput,
           THTensor *gradInput,
           real min_val,
-          real max_val)
+          real max_val,
+          bool inplace)
 {
-  THTensor_(resizeAs)(gradInput, input);
+  if (inplace)
+    THTensor_(set)(gradInput, gradOutput);
+  else
+    THTensor_(resizeAs)(gradInput, input);
 
   if (input->nDimension == 1 ||
     !THTensor_(isContiguous)(input) ||
     !THTensor_(isContiguous)(gradOutput) ||
     !THTensor_(isContiguous)(gradInput))
   {
-    TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
-      if (*input_data < min_val || *input_data > max_val)
-        *gradInput_data = 0;
-      else
-        *gradInput_data = *gradOutput_data;
-    );
+    if (inplace)
+    {
+      TH_TENSOR_APPLY2(real, gradOutput, real, input,
+        if (*input_data < min_val || *input_data > max_val)
+          *gradOutput_data = 0;
+      );
+    }
+    else
+      TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+        if (*input_data < min_val || *input_data > max_val)
+          *gradInput_data = 0;
+        else
+          *gradInput_data = *gradOutput_data;
+      );
   }
   else
   {
     real* ptr_gradOutput = THTensor_(data)(gradOutput);
     real* ptr_gradInput  = THTensor_(data)(gradInput);
     real* ptr_input      = THTensor_(data)(input);
     long i;
+    long n = THTensor_(nElement)(input);
 
+    if (inplace)
 #pragma omp parallel for private(i)
-    for (i = 0; i < THTensor_(nElement)(input); i++)
-    {
-      if (ptr_input[i] < min_val || ptr_input[i] > max_val)
-        ptr_gradInput[i] = 0;
-      else
-        ptr_gradInput[i] = ptr_gradOutput[i];
-    }
+      for (i = 0; i < n; i++)
+      {
+        if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
+          ptr_gradInput[i] = 0;
+      }
+    else
+#pragma omp parallel for private(i)
+      for (i = 0; i < n; i++)
+      {
+        if (ptr_input[i] < min_val || ptr_input[i] > max_val)
+          ptr_gradInput[i] = 0;
+        else
+          ptr_gradInput[i] = ptr_gradOutput[i];
+      }
   }
 }
 
diff --git a/lib/THNN/generic/ReLU6.c b/lib/THNN/generic/ReLU6.c
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
@@ -106,14 +106,16 @@ TH_API void THNN_(HardTanh_updateOutput)(
           THTensor *input,             // input tensor
           THTensor *output,            // [OUT] output tensor
           real min_val,                // lower threshold
-          real max_val);               // upper threshold
+          real max_val,
+          bool inplace);               // upper threshold
 TH_API void THNN_(HardTanh_updateGradInput)(
           THNNState *state,            // library's state
           THTensor *input,             // input tensor
           THTensor *gradOutput,        // gradient w.r.t. module's output
           THTensor *gradInput,         // [OUT] gradient w.r.t. the input
           real min_val,                // lower threshold
-          real max_val);               // upper threshold
+          real max_val,
+          bool inplace);               // upper threshold
 
 TH_API void THNN_(L1Cost_updateOutput)(
           THNNState *state,            // library's state
@@ -472,18 +474,6 @@ TH_API void THNN_(Threshold_updateGradInput)(
           real threshold,
           bool inplace);
 
-TH_API void THNN_(ReLU6_updateOutput)(
-          THNNState *state,
-          THTensor *input,
-          THTensor *output,
-          bool inplace);
-TH_API void THNN_(ReLU6_updateGradInput)(
-          THNNState *state,
-          THTensor *input,
-          THTensor *gradOutput,
-          THTensor *gradInput,
-          bool inplace);
-
 TH_API void THNN_(TemporalConvolution_updateOutput)(
           THNNState *state,
           THTensor *input,
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
@@ -94,9 +94,6 @@
 #include "generic/Threshold.c"
 #include "THGenerateFloatTypes.h"
 
-#include "generic/ReLU6.c"
-#include "THGenerateFloatTypes.h"
-
 #include "generic/TemporalConvolution.c"
 #include "THGenerateFloatTypes.h"
 
diff --git a/test.lua b/test.lua
@@ -273,10 +273,10 @@ function nntest.ReLU6()
       local lt = input:clone():lt(input, 6)
       local output2 = gt:clone():cmul(lt):cmul(input)
       output2:add(6, input:clone():gt(input, 6))
-      mytester:assertTensorEq(output, output2, 0.000001, 'ReLU6 output')
+      mytester:assertTensorEq(output, output2, 0.000001, 'ReLU6 output '..(inplace and '(inplace)' or '') )
       local gradInput = module:backward(input, gradOutput:clone())
       local gradInput2 = gt:clone():cmul(lt):cmul(gradOutput)
-      mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput')
+      mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput '..(inplace and '(inplace)' or '') )
    end
 end