ARM-software · morgolock · Oct 16, 2025 · Aug 6, 2025
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -103,6 +103,7 @@ class NEConvolutionLayer : public IFunction
      * |QASYMM8        |QASYMM8_SIGNED     |S32    |QASYMM8        |
      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32    |QASYMM8        |
      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED     |F32    |F32            |
      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32    |QASYMM8_SIGNED |
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -111,7 +112,8 @@ class NEConvolutionLayer : public IFunction
      * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
      *                              Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                              Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     *                              Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type and
+     *                              for F32 dequantization the bias must be F32.
      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p input.
      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
@@ -140,8 +142,10 @@ class NEConvolutionLayer : public IFunction
      *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
      *                             Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in] biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                             Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     * @param[in] biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                             Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type and
+     *                             for F32 dequantization the bias must be F32.
      * @param[in] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                             Data types supported: Same as @p input.
      * @param[in] conv_info        Contains padding and stride information described in @ref PadStrideInfo.

diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -78,6 +78,7 @@ class NEGEMMConvolutionLayer : public IFunction
      * |QASYMM8        |QASYMM8            |S32      |QASYMM8        |
      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32      |QASYMM8        |
      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32      |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED     |F32      |F32            |
      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32      |QASYMM8_SIGNED |
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -87,6 +88,7 @@ class NEGEMMConvolutionLayer : public IFunction
      *                              Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                              Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     *                              For F32 dequantization the bias must be F32.
      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p input.
      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
@@ -117,6 +119,7 @@ class NEGEMMConvolutionLayer : public IFunction
      *                             Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
      * @param[in] biases           Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                             Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     *                             For F32 dequantization the bias must be F32.
      * @param[in] output           Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                             Data types supported: Same as @p input.
      * @param[in] conv_info        Contains padding and stride information described in @ref PadStrideInfo.

diff --git a/arm_compute/runtime/experimental/operators/CpuGemmConv2d.h b/arm_compute/runtime/experimental/operators/CpuGemmConv2d.h
@@ -77,6 +77,7 @@ class CpuGemmConv2d : public IOperator
      * |QASYMM8        |QASYMM8_SIGNED     |S32      |QASYMM8        |
      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32      |QASYMM8        |
      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32      |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED     |F32      |F32            |
      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32      |QASYMM8_SIGNED |
      *
      * @param[in]  src              Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -86,6 +87,7 @@ class CpuGemmConv2d : public IOperator
      *                              Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
      * @param[in]  biases           Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                              Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     *                              For F32 dequantization the bias must be F32.
      * @param[out] dst              Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p input.
      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.

diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
@@ -610,6 +610,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
     <tr><td>QASYMM8<td>QASYMM8_SIGNED<td>S32<td>QASYMM8
     <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
     <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>F32<td>F32
     <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
     </table>
 <tr>
@@ -1712,6 +1713,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
     <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
     <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
     <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>F32<td>F32
     <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
     </table>
 <tr>

diff --git a/src/cpu/kernels/gemmlowp/generic/neon/impl.h b/src/cpu/kernels/gemmlowp/generic/neon/impl.h
@@ -66,8 +66,9 @@ void neon_run_offset_contribution_float(const Window  &window,
     const int window_step_x  = 16;
 
     // if vector_sum_col is nullptr then stride_y is 0, else get stride_y
-    const size_t sum_col_stride_y = (vector_sum_col != nullptr) ? (vector_sum_col->info()->strides_in_bytes().y()) : 0;
-    Iterator     mm_result_it(mm_result, collapsed_window);
+    const size_t sum_col_stride_w = (vector_sum_col != nullptr) ? vector_sum_col->info()->strides_in_bytes()[3] : 0;
+
+    Iterator mm_result_it(mm_result, collapsed_window);
 
     if ((a_offset != 0) && (b_offset != 0) && (vector_sum_col != nullptr) && (vector_sum_row != nullptr)) // true, true
     {
@@ -96,7 +97,7 @@ void neon_run_offset_contribution_float(const Window  &window,
             [&](const Coordinates &id)
             {
                 const int    batch_id         = id.z() / depth_input;
-                const size_t batch_offset_col = batch_id * sum_col_stride_y;
+                const size_t batch_offset_col = batch_id * sum_col_stride_w;
                 auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
                                                                             batch_id * vector_sum_col_batch_offset);
                 auto mm_result_ptr      = reinterpret_cast<T *>(mm_result_it.ptr());
@@ -216,7 +217,7 @@ void neon_run_offset_contribution_float(const Window  &window,
                 const int    batch_id = id.z() / depth_input;
                 const size_t batch_offset_col =
                     batch_id *
-                    sum_col_stride_y; // Value to offset vector_sum_col_ptr to allow for iteration of y values in tensor
+                    sum_col_stride_w; // Value to offset vector_sum_col_ptr to allow for iteration of w values in tensor
                 auto vector_sum_col_ptr = reinterpret_cast<const int32_t *>(vector_sum_col_it.ptr() + batch_offset_col +
                                                                             batch_id * vector_sum_col_batch_offset);
                 auto mm_result_ptr      = reinterpret_cast<T *>(mm_result_it.ptr());

diff --git a/src/cpu/operators/CpuConv2d.h b/src/cpu/operators/CpuConv2d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,6 +88,7 @@ class CpuConv2d : public ICpuOperator
      * |QASYMM8        |QASYMM8_SIGNED     |S32    |QASYMM8        |
      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32    |QASYMM8        |
      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED     |F32    |F32            |
      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32    |QASYMM8_SIGNED |
      *
      * @param[in]  src              Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
@@ -97,6 +98,7 @@ class CpuConv2d : public ICpuOperator
      *                              Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL or QASYMM8_SIGNED if input is QASYMM8/QASYMM8_SIGNED.
      * @param[in]  biases           Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                              Data type supported: Same as @p src, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+     *                              For F32 dequantization the bias must be F32.
      * @param[out] dst              Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p src.
      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.

diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp
@@ -153,6 +153,10 @@ void initialize_reshaped_weight_info(const ITensorInfo &weights, ITensorInfo &re
         reshaped_weights.set_tensor_shape(collapsed_weights);
     }
 }
+inline bool int8_dequantize_f32_path(DataType src, DataType dst)
+{
+    return src == DataType::QASYMM8_SIGNED && dst == DataType::F32;
+}
 } // namespace
 
 CpuGemmConv2d::WeightTransformMethod CpuGemmConv2d::get_wt_method(const ITensorInfo &weights)
@@ -287,12 +291,27 @@ void CpuGemmConv2d::configure_mm(const ITensorInfo         *src,
         }
 
         GEMMLowpOutputStageInfo output_info;
-        output_info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
-        output_info.gemmlowp_offset          = uoqinfo.offset;
-        output_info.gemmlowp_min_bound       = min_activation;
-        output_info.gemmlowp_max_bound       = max_activation;
-        output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
-        quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
+
+        // F32 dequant path? (input quantized, output float)
+        if (int8_dequantize_f32_path(data_type, dst->data_type()))
+        {
+            // No requant stage; offsets are handled via offset-contribution on int32
+            output_info.type                     = GEMMLowpOutputStageType::NONE;
+            output_info.gemmlowp_offset          = 0;
+            output_info.gemmlowp_min_bound       = 0;
+            output_info.gemmlowp_max_bound       = 0;
+            output_info.is_quantized_per_channel = false; // irrelevant when NONE
+        }
+        else
+        {
+            // Existing Q->Q path
+            output_info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+            output_info.gemmlowp_offset          = uoqinfo.offset;
+            output_info.gemmlowp_min_bound       = min_activation;
+            output_info.gemmlowp_max_bound       = max_activation;
+            output_info.is_quantized_per_channel = (tmp_weights.data_type() == DataType::QSYMM8_PER_CHANNEL);
+            quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
+        }
 
         const GEMMInfo gemm_info =
             GEMMInfo(false /* is_a_reshaped */, false /* is_b_reshaped */, true /* reshape_b_only_on_first_run */,
@@ -367,14 +386,28 @@ Status CpuGemmConv2d::validate_mm(const ITensorInfo         *src,
         {
             std::tie(min_activation, max_activation) = get_quantized_activation_min_max(act_info, data_type, uoqinfo);
         }
-
+        // F32 dequant path? (input quantized, output float)
         GEMMLowpOutputStageInfo output_info;
-        output_info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
-        output_info.gemmlowp_offset          = uoqinfo.offset;
-        output_info.gemmlowp_min_bound       = min_activation;
-        output_info.gemmlowp_max_bound       = max_activation;
-        output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
-        ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
+        if (int8_dequantize_f32_path(data_type, dst->data_type()))
+        {
+            // No requant stage; offsets are handled via offset-contribution on int32
+            output_info.type                     = GEMMLowpOutputStageType::NONE;
+            output_info.gemmlowp_offset          = 0;
+            output_info.gemmlowp_min_bound       = 0;
+            output_info.gemmlowp_max_bound       = 0;
+            output_info.is_quantized_per_channel = false; // irrelevant when NONE
+        }
+        else
+        {
+            // Existing Q->Q path
+            output_info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+            output_info.gemmlowp_offset          = uoqinfo.offset;
+            output_info.gemmlowp_min_bound       = min_activation;
+            output_info.gemmlowp_max_bound       = max_activation;
+            output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
+            ARM_COMPUTE_RETURN_ON_ERROR(
+                quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
+        }
 
         // Perform validation step on GEMMLowp
         std::unique_ptr<ITensorInfo> input_qa   = src->clone();
@@ -506,7 +539,10 @@ void CpuGemmConv2d::configure(const ITensorInfo         *src,
     const unsigned int mat_weights_cols = weights->dimension(idx_kernels);
 
     // Create temporary GEMM output tensor in case we cannot skip col2im
-    const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
+    const DataType output_data_type =
+        data_type == DataType::BFLOAT16 || int8_dequantize_f32_path(data_type, dst->data_type()) ? DataType::F32
+                                                                                                 : data_type;
+
     if (!_skip_col2im)
     {
         TensorShape shape_gemm;
@@ -725,7 +761,14 @@ Status CpuGemmConv2d::validate(const ITensorInfo         *src,
     {
         if (is_quantized)
         {
-            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
+            if (data_type == DataType::QASYMM8_SIGNED && dst->data_type() == DataType::F32)
+            {
+                ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
+            }
+            else
+            {
+                ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
+            }
         }
         else if (is_bf16)
         {
@@ -777,7 +820,9 @@ Status CpuGemmConv2d::validate(const ITensorInfo         *src,
     }
 
     // Create temporary GEMM output tensor in case we cannot skip col2im
-    const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type;
+    const DataType output_data_type =
+        data_type == DataType::BFLOAT16 || int8_dequantize_f32_path(data_type, dst->data_type()) ? DataType::F32
+                                                                                                 : data_type;
     if (!skip_col2im)
     {
         TensorShape shape_gemm = gemm_input_to_use->tensor_shape();