arhik · arhik · commit 23f0976bca57 · 2019-11-30T07:14:52.000+05:30
Combining dense, depthwise and groupwise convolutions through common interface `DenseConvDims`
diff --git a/src/conv.jl b/src/conv.jl
@@ -1,6 +1,4 @@
-export conv, conv!, ∇conv_data, ∇conv_data!, ∇conv_filter, ∇conv_filter!, depthwiseconv,
-        depthwiseconv!, ∇depthwiseconv_data, ∇depthwiseconv_data!, ∇depthwiseconv_filter,
-        ∇depthwiseconv_filter!
+export conv, conv!, ∇conv_data, ∇conv_data!, ∇conv_filter, ∇conv_filter!
 
 ## Convolution API
 #
@@ -36,9 +34,6 @@ for (front_name, backend) in (
         :conv                   => :im2col,
         :∇conv_data             => :im2col,
         :∇conv_filter           => :im2col,
-        :depthwiseconv          => :im2col,
-        :∇depthwiseconv_data    => :im2col,
-        :∇depthwiseconv_filter  => :im2col,
     )
 
     # These are the GEMM types we will accelerate with `im2col`
@@ -58,8 +53,7 @@ end
 # Our strategy for 1d and 2d convolution is to reshape to 3d convolutions, which
 # makes things MUCH EASIER for us on the backend side, and is in general pretty fast,
 # since we can specialize on sizes.
-for front_name in (:conv, :∇conv_data, :∇conv_filter,
-                   :depthwiseconv, :∇depthwiseconv_data, :∇depthwiseconv_filter)
+for front_name in (:conv, :∇conv_data, :∇conv_filter)
     for backend in (Symbol(), :_direct, :_im2col)
         for N in (3, 4)
             @eval begin
@@ -87,8 +81,7 @@ end
 # We always support a fallback, non-accelerated path, where we use the direct, but
 # slow, implementations.  These should not typically be used, hence the `@debug`,
 # but let's ggo ahead and define them first:
-for front_name in (:conv, :∇conv_data, :∇conv_filter,
-                   :depthwiseconv, :∇depthwiseconv_data, :∇depthwiseconv_filter)
+for front_name in (:conv, :∇conv_data, :∇conv_filter)
     @eval begin
         function $(Symbol("$(front_name)!"))(
                         y::AbstractArray{yT,N}, in1::AbstractArray{T1,N},
@@ -106,7 +99,7 @@ end
 # allocation.  :P
 for backend in (Symbol(), :_direct, :_im2col)
     # First make auto-allocating versions of the conv()-like calls:
-    for name in (:conv, :depthwiseconv)
+    for name in (:conv,)
         @eval begin
             function $(Symbol("$(name)$(backend)"))(
                             x::AbstractArray{xT,N}, w::AbstractArray{wT,N},
@@ -118,7 +111,7 @@ for backend in (Symbol(), :_direct, :_im2col)
         end
     end
 
-    for name in (:∇conv_data, :∇depthwiseconv_data)
+    for name in (:∇conv_data,)
         @eval begin
             function $(Symbol("$(name)$(backend)"))(
                             dy::AbstractArray{yT,N}, w::AbstractArray{wT,N},
@@ -130,28 +123,17 @@ for backend in (Symbol(), :_direct, :_im2col)
         end
     end
 
-    # We do the conv/depthwiseconv filter backprops separately, as the shape calculation
-    # for `w` is slightly different for depthwise than for normal dense convolution.
+    # This filter back prop covers dense/depthwise/groupwise conv filter backprops, as groupcount alone 
+    # is a deciding factor from cudnn's perspective. For backends im2col and direct needs to be handled.
     @eval begin
         function $(Symbol("∇conv_filter$(backend)"))(
                         x::AbstractArray{xT,N}, dy::AbstractArray{yT,N},
                         cdims::ConvDims; kwargs...) where {xT, yT, N}
-            dw = similar(dy, kernel_size(cdims)..., channels_in(cdims),
+            dw = similar(dy, kernel_size(cdims)..., div(channels_in(cdims),group_count(cdims)),
                                                     channels_out(cdims))
             return $(Symbol("∇conv_filter$(backend)!"))(dw, x, dy, cdims; kwargs...)
         end
     end
-
-    @eval begin
-        function $(Symbol("∇depthwiseconv_filter$(backend)"))(
-                        x::AbstractArray{xT,N}, dy::AbstractArray{yT,N},
-                        cdims::ConvDims; kwargs...) where {xT, yT, N}
-            dw = similar(dy, kernel_size(cdims)..., channel_multiplier(cdims),
-                                                    channels_in(cdims))
-            return $(Symbol("∇depthwiseconv_filter$(backend)!"))(dw, x, dy, cdims;
-                                                                 kwargs...)
-        end
-    end
 end
 
 
@@ -172,10 +154,10 @@ function conv(x, w::AbstractArray{T, N}; stride = 1, pad = 0, dilation = 1, flip
     return conv(x, w, cdims)
 end
 
-function depthwiseconv(x, w::AbstractArray{T, N}; stride = 1, pad = 0, dilation = 1, flipped = false) where {T, N}
+function depthwiseconv(x, w::AbstractArray{T, N}; stride = 1, pad = 0, dilation = 1, flipped = false, groupcount) where {T, N}
     stride = expand(Val(N-2), stride)
     pad = expand(Val(N-2), pad)
     dilation = expand(Val(N-2), dilation)
-    cdims = DepthwiseConvDims(x, w; stride = stride, padding = pad, dilation = dilation, flipkernel = flipped)
+    cdims = DenseConvDims(x, w; stride = stride, padding = pad, dilation = dilation, flipkernel = flipped, groupcount=groupcount)
     return depthwiseconv(x, w, cdims)
 end
diff --git a/src/dim_helpers/ConvDims.jl b/src/dim_helpers/ConvDims.jl
@@ -16,9 +16,7 @@ abstract type ConvDims{N, S, P, D, F} end
 
 # Hack to get rid of type parameters
 function basetype(::Type{C}) where {C <: ConvDims}
-    if C <: DepthwiseConvDims
-        return DepthwiseConvDims
-    elseif C <: DenseConvDims
+    if C <: DenseConvDims
         return DenseConvDims
     elseif C <: PoolDims
         return PoolDims
diff --git a/src/dim_helpers/DenseConvDims.jl b/src/dim_helpers/DenseConvDims.jl
@@ -5,30 +5,31 @@ export DenseConvDims
 
 Concrete subclass of `ConvDims` for a normal, dense, conv2d/conv3d.
 """
-struct DenseConvDims{N,K,C_in,C_out,S,P,D,F} <: ConvDims{N,S,P,D,F}
+struct DenseConvDims{N,K,C_in,C_out,S,P,D,F,G} <: ConvDims{N,S,P,D,F}
     I::NTuple{N,Int}
 end
 
 # Getters for the fields
 input_size(c::DenseConvDims) = c.I
-kernel_size(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F}) where {N,K,C_in,C_out,S,P,D,F} = K
-channels_in(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F}) where {N,K,C_in,C_out,S,P,D,F} = C_in
-channels_out(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F}) where {N,K,C_in,C_out,S,P,D,F} = C_out
+kernel_size(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F,G}) where {N,K,C_in,C_out,S,P,D,F,G} = K
+channels_in(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F,G}) where {N,K,C_in,C_out,S,P,D,F,G} = C_in
+channels_out(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F,G}) where {N,K,C_in,C_out,S,P,D,F,G} = C_out
+group_count(c::DenseConvDims{N,K,C_in,C_out,S,P,D,F,G}) where {N,K,C_in,C_out,S,P,D,F,G} = G
 
 # Convenience wrapper to create DenseConvDims objects
 function DenseConvDims(x_size::NTuple{M}, w_size::NTuple{M};
-                       stride=1, padding=0, dilation=1, flipkernel::Bool=false) where M
+                       stride=1, padding=0, dilation=1, flipkernel::Bool=false, groupcount=1) where M
     # Do common parameter validation
     stride, padding, dilation = check_spdf(x_size, w_size, stride, padding, dilation)
 
     # Ensure channels are equal
-    if x_size[end-1] != w_size[end-1]
+    if x_size[end-1] != w_size[end-1]*groupcount
         xs = x_size[end-1]
-        ws = w_size[end-1]
+        ws = w_size[end-1]*groupcount
         throw(DimensionMismatch("Input channels must match! ($xs vs. $ws)"))
     end
-    
-    # The type parameters are what 
+
+    # The type parameters are what
     return DenseConvDims{
         M - 2,
         w_size[1:end-2],
@@ -37,7 +38,8 @@ function DenseConvDims(x_size::NTuple{M}, w_size::NTuple{M};
         stride,
         padding,
         dilation,
-        flipkernel
+        flipkernel,
+        groupcount
     }(
         # Input spatial size
         x_size[1:end-2],
@@ -56,17 +58,17 @@ end
 # from the original progenitor object that it inherits shapes from.
 function DenseConvDims(c::ConvDims; N=spatial_dims(c), I=input_size(c), K=kernel_size(c),
                        C_in=channels_in(c), C_out=channels_out(c), S=stride(c),
-                       P=padding(c), D=dilation(c), F=flipkernel(c))
-    return DenseConvDims{N, K, C_in, C_out, S, P, D, F}(I)
+                       P=padding(c), D=dilation(c), F=flipkernel(c), G=group_count(c))
+    return DenseConvDims{N, K, C_in, C_out, S, P, D, F, G}(I)
 end
 
 function check_dims(x::NTuple{M}, w::NTuple{M}, y::NTuple{M}, cdims::DenseConvDims) where {M}
     # First, check that channel counts are all correct:
     @assert x[end-1] == channels_in(cdims) DimensionMismatch("Data input channel count ($(x[end-1]) vs. $(channels_in(cdims)))")
     @assert y[end-1] == channels_out(cdims) DimensionMismatch("Data output channel count ($(y[end-1]) vs. $(channels_out(cdims)))")
-    @assert w[end-1] == channels_in(cdims) DimensionMismatch("Kernel input channel count ($(w[end-1]) vs. $(channels_in(cdims)))")
+    @assert w[end-1] == channels_in(cdims)/group_count(cdims) DimensionMismatch("Kernel input channel count ($(w[end-1]) vs. $(channels_in(cdims)/group_count(cdims)))")
     @assert w[end] == channels_out(cdims) DimensionMismatch("Kernel output channel count ($(w[end]) vs. $(channels_out(cdims)))")
-    
+
     # Next, check that the spatial dimensions match up
     @assert x[1:end-2] == input_size(cdims) DimensionMismatch("Data input spatial size ($(x[1:end-2]) vs. $(input_size(cdims)))")
     @assert y[1:end-2] == output_size(cdims) DimensionMismatch("Data output spatial size ($(y[1:end-2]) vs. $(output_size(cdims)))")
diff --git a/src/dim_helpers/DepthwiseConvDims.jl b/src/dim_helpers/DepthwiseConvDims.jl
@@ -1,90 +1,90 @@
-export DepthwiseConvDims
-
-"""
-    DepthwiseConvDims
-
-Concrete subclass of `ConvDims` for a depthwise convolution.  Differs primarily due to
-characterization by C_in, C_mult, rather than C_in, C_out.  Useful to be separate from
-DenseConvDims primarily for channel calculation differences.
-"""
-struct DepthwiseConvDims{N,S,P,D,F} <: ConvDims{N,S,P,D,F}
-    I::NTuple{N, Int}
-    K::NTuple{N, Int}
-    C_in::Int
-    C_mult::Int
-end
-
-# Getters for the fields
-input_size(c::DepthwiseConvDims) = c.I
-kernel_size(c::DepthwiseConvDims) = c.K
-channels_in(c::DepthwiseConvDims) = c.C_in
-channels_out(c::DepthwiseConvDims) = c.C_in * channel_multiplier(c)
-channel_multiplier(c::DepthwiseConvDims) = c.C_mult
-
-
-# Convenience wrapper to create DepthwiseConvDims objects
-function DepthwiseConvDims(x_size::NTuple{M}, w_size::NTuple{M};
-                           stride=1, padding=0, dilation=1, flipkernel::Bool=false) where M
-    # Do common parameter validation
-    stride, padding, dilation = check_spdf(x_size, w_size, stride, padding, dilation)
-
-    # Ensure channels are equal
-    if x_size[end-1] != w_size[end]
-        xs = x_size[end-1]
-        ws = w_size[end]
-        throw(DimensionMismatch("Input channels must match! ($xs vs. $ws)"))
-    end
-    
-    return DepthwiseConvDims{
-        M - 2,
-        stride,
-        padding,
-        dilation,
-        flipkernel
-    }(
-        # Image spatial size
-        x_size[1:end-2],
-
-        # Kernel spatial size
-        w_size[1:end-2],
-
-        # Input channels
-        x_size[end-1],
-
-        # Channel multiplier
-        w_size[end-1],
-    )
-end
-
-# Auto-extract sizes and just pass those directly in
-function DepthwiseConvDims(x::AbstractArray, w::AbstractArray; kwargs...)
-    if ndims(x) != ndims(w)
-        throw(DimensionMismatch("Rank of x and w must match! ($(ndims(x)) vs. $(ndims(w)))"))
-    end
-    return DepthwiseConvDims(size(x), size(w); kwargs...)
-end
-
-# Useful for constructing a new DepthwiseConvDims that has only a few elements different
-# from the original progenitor object.
-function DepthwiseConvDims(c::DepthwiseConvDims; N=spatial_dims(c), I=input_size(c), K=kernel_size(c),
-                           C_in=channels_in(c), C_m=channel_multiplier(c), S=stride(c),
-                           P=padding(c), D=dilation(c), F=flipkernel(c))
-    return DepthwiseConvDims{N, S, P, D, F}(I, K, C_in, C_m)
-end
-
-# This one is basically the same as for DenseConvDims, we only change a few lines for kernel channel count
-function check_dims(x::NTuple{M}, w::NTuple{M}, y::NTuple{M}, cdims::DepthwiseConvDims) where {M}
-    # First, check that channel counts are all correct:
-    @assert x[end-1] == channels_in(cdims) DimensionMismatch("Data input channel count ($(x[end-1]) vs. $(channels_in(cdims)))")
-    @assert y[end-1] == channels_out(cdims) DimensionMismatch("Data output channel count ($(y[end-1]) vs. $(channels_out(cdims)))")
-    @assert w[end-1] == channel_multiplier(cdims) DimensionMismatch("Kernel multiplier channel count ($(w[end-1]) vs. $(channel_multiplier(cdims))")
-    @assert w[end] == channels_in(cdims) DimensionMismatch("Kernel input channel count ($(w[end]) vs. $(channels_in(cdims)))")
-    
-    # Next, check that the spatial dimensions match up
-    @assert x[1:end-2] == input_size(cdims) DimensionMismatch("Data input spatial size ($(x[1:end-2]) vs. $(input_size(cdims)))")
-    @assert y[1:end-2] == output_size(cdims) DimensionMismatch("Data output spatial size ($(y[1:end-2]) vs. $(output_size(cdims)))")
-    @assert w[1:end-2] == kernel_size(cdims) DimensionMismatch("Kernel spatial size ($(w[1:end-2]) vs. $(kernel_size(cdims)))")
-
-    # Finally, check that the batch size matches
-    @assert x[end] == y[end] DimensionMismatch("Batch size ($(x[end]) vs. $(y[end]))")
-end
+# export DepthwiseConvDims
+#
+# """
+#     DepthwiseConvDims
+#
+# Concrete subclass of `ConvDims` for a depthwise convolution.  Differs primarily due to
+# characterization by C_in, C_mult, rather than C_in, C_out.  Useful to be separate from
+# DenseConvDims primarily for channel calculation differences.
+# """
+# struct DepthwiseConvDims{N,S,P,D,F} <: ConvDims{N,S,P,D,F}
+#     I::NTuple{N, Int}
+#     K::NTuple{N, Int}
+#     C_in::Int
+#     C_mult::Int
+# end
+#
+# # Getters for the fields
+# input_size(c::DepthwiseConvDims) = c.I
+# kernel_size(c::DepthwiseConvDims) = c.K
+# channels_in(c::DepthwiseConvDims) = c.C_in
+# channels_out(c::DepthwiseConvDims) = c.C_in * channel_multiplier(c)
+# channel_multiplier(c::DepthwiseConvDims) = c.C_mult
+#
+#
+# # Convenience wrapper to create DepthwiseConvDims objects
+# function DepthwiseConvDims(x_size::NTuple{M}, w_size::NTuple{M};
+#                            stride=1, padding=0, dilation=1, flipkernel::Bool=false) where M
+#     # Do common parameter validation
+#     stride, padding, dilation = check_spdf(x_size, w_size, stride, padding, dilation)
+#
+#     # Ensure channels are equal
+#     if x_size[end-1] != w_size[end]
+#         xs = x_size[end-1]
+#         ws = w_size[end]
+#         throw(DimensionMismatch("Input channels must match! ($xs vs. $ws)"))
+#     end
+#
+#     return DepthwiseConvDims{
+#         M - 2,
+#         stride,
+#         padding,
+#         dilation,
+#         flipkernel
+#     }(
+#         # Image spatial size
+#         x_size[1:end-2],
+#
+#         # Kernel spatial size
+#         w_size[1:end-2],
+#
+#         # Input channels
+#         x_size[end-1],
+#
+#         # Channel multiplier
+#         w_size[end-1],
+#     )
+# end
+#
+# # Auto-extract sizes and just pass those directly in
+# function DepthwiseConvDims(x::AbstractArray, w::AbstractArray; kwargs...)
+#     if ndims(x) != ndims(w)
+#         throw(DimensionMismatch("Rank of x and w must match! ($(ndims(x)) vs. $(ndims(w)))"))
+#     end
+#     return DepthwiseConvDims(size(x), size(w); kwargs...)
+# end
+#
+# # Useful for constructing a new DepthwiseConvDims that has only a few elements different
+# # from the original progenitor object.
+# function DepthwiseConvDims(c::DepthwiseConvDims; N=spatial_dims(c), I=input_size(c), K=kernel_size(c),
+#                            C_in=channels_in(c), C_m=channel_multiplier(c), S=stride(c),
+#                            P=padding(c), D=dilation(c), F=flipkernel(c))
+#     return DepthwiseConvDims{N, S, P, D, F}(I, K, C_in, C_m)
+# end
+#
+# # This one is basically the same as for DenseConvDims, we only change a few lines for kernel channel count
+# function check_dims(x::NTuple{M}, w::NTuple{M}, y::NTuple{M}, cdims::DepthwiseConvDims) where {M}
+#     # First, check that channel counts are all correct:
+#     @assert x[end-1] == channels_in(cdims) DimensionMismatch("Data input channel count ($(x[end-1]) vs. $(channels_in(cdims)))")
+#     @assert y[end-1] == channels_out(cdims) DimensionMismatch("Data output channel count ($(y[end-1]) vs. $(channels_out(cdims)))")
+#     @assert w[end-1] == channel_multiplier(cdims) DimensionMismatch("Kernel multiplier channel count ($(w[end-1]) vs. $(channel_multiplier(cdims))")
+#     @assert w[end] == channels_in(cdims) DimensionMismatch("Kernel input channel count ($(w[end]) vs. $(channels_in(cdims)))")
+#
+#     # Next, check that the spatial dimensions match up
+#     @assert x[1:end-2] == input_size(cdims) DimensionMismatch("Data input spatial size ($(x[1:end-2]) vs. $(input_size(cdims)))")
+#     @assert y[1:end-2] == output_size(cdims) DimensionMismatch("Data output spatial size ($(y[1:end-2]) vs. $(output_size(cdims)))")
+#     @assert w[1:end-2] == kernel_size(cdims) DimensionMismatch("Kernel spatial size ($(w[1:end-2]) vs. $(kernel_size(cdims)))")
+#
+#     # Finally, check that the batch size matches
+#     @assert x[end] == y[end] DimensionMismatch("Batch size ($(x[end]) vs. $(y[end]))")
+# end
diff --git a/src/impl/depthwiseconv_direct.jl b/src/impl/depthwiseconv_direct.jl
diff --git a/src/impl/depthwiseconv_im2col.jl b/src/impl/depthwiseconv_im2col.jl