Skip to content

Commit b92dff2

Browse files
committed
test: Removed pointless multithreading tests.
These tests have been removed as CpuGEMMLowp is not threadsafe. Thread safe usage is left to the consuming application to manage. Signed-off-by: Anna Mayne <[email protected]> Change-Id: I3a33655166e9bfd4e9db5cbf4cb72de91ac8a17f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/15062 Reviewed-by: Syed Wajahat Abbas Naqvi <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]>
1 parent 3c32d70 commit b92dff2

File tree

2 files changed

+86
-168
lines changed

2 files changed

+86
-168
lines changed

tests/validation/fixtures/CpuGEMMLowpFixture.h

Lines changed: 81 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@
2929

3030
#include <cstdint>
3131

32-
#ifndef BARE_METAL
33-
#include <thread>
34-
#endif // ifndef BARE_METAL
35-
3632
namespace arm_compute
3733
{
3834
namespace test
@@ -41,169 +37,114 @@ namespace validation
4137
{
4238

4339
namespace {
44-
constexpr int NUM_THREADS = 3;
4540

4641
template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
47-
void compute_cpugemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
42+
TensorType compute_cpugemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
4843
const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
4944
GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
50-
bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN, int num_parallel_runs = 1, TensorType targets[NUM_THREADS] = {})
45+
bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
5146
{
5247
ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
53-
ARM_COMPUTE_ASSERT(num_parallel_runs > 1 ? run_twice == false : true);
5448

5549
// If unknown, set to sensible defaults
5650
if (data_type_output == DataType::UNKNOWN) {
5751
data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
5852
}
5953

6054
// Create tensors
61-
TensorType a[NUM_THREADS];
62-
TensorType b[NUM_THREADS];
63-
TensorType output[NUM_THREADS];
64-
TensorType *out_ptrs[NUM_THREADS];
65-
TensorType bias[NUM_THREADS];
66-
67-
for(int i = 0; i < num_parallel_runs; ++i){
68-
a[i] = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo);
69-
b[i] = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
70-
output[i] = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
71-
out_ptrs[i] = &output[i];
55+
TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo);
56+
TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
57+
TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
58+
TensorType bias;
7259

73-
if(is_fused)
74-
{
75-
TensorShape bias_shape(shape_b[0]);
76-
bias[i] = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1);
77-
}
60+
if(is_fused)
61+
{
62+
TensorShape bias_shape(shape_b[0]);
63+
bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1);
7864
}
7965

8066
// Create and configure function
8167
// The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
8268
FunctionType gemmlowp;
83-
gemmlowp.configure(a[0].info(), b[0].info(), is_fused ? bias[0].info() : nullptr, out_ptrs[0]->info(), GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
69+
gemmlowp.configure(a.info(), b.info(), is_fused ? bias.info() : nullptr, output.info(), GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
8470
output_stage, false /*fp_mixed_precision*/, false /*fast_math*/, false /*broadcast_bias*/,
8571
arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED,
8672
false /* pretranspose_B */, accumulate));
8773

88-
for(int i = 0; i < num_parallel_runs; ++i)
74+
// If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
75+
if (dynamic_qinfo)
8976
{
90-
// If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
91-
if (dynamic_qinfo)
92-
{
93-
a[i].info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true));
94-
b[i].info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true));
95-
output[i].info()->set_quantization_info(QuantizationInfo(output_qinfo.scale(), output_qinfo.offset(), true));
96-
gemmlowp.update_quantization_parameters(a[i].info()->quantization_info(),
97-
b[i].info()->quantization_info(),
98-
output[i].info()->quantization_info(),
99-
data_type_output,
100-
true, true);
101-
}
77+
a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true));
78+
b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true));
79+
output.info()->set_quantization_info(QuantizationInfo(output_qinfo.scale(), output_qinfo.offset(), true));
80+
gemmlowp.update_quantization_parameters(a.info()->quantization_info(),
81+
b.info()->quantization_info(),
82+
output.info()->quantization_info(),
83+
data_type_output,
84+
true, true);
85+
}
10286

103-
ARM_COMPUTE_ASSERT(a[i].info()->is_resizable());
104-
ARM_COMPUTE_ASSERT(b[i].info()->is_resizable());
105-
ARM_COMPUTE_ASSERT(output[i].info()->is_resizable());
87+
ARM_COMPUTE_ASSERT(a.info()->is_resizable());
88+
ARM_COMPUTE_ASSERT(b.info()->is_resizable());
89+
ARM_COMPUTE_ASSERT(output.info()->is_resizable());
10690

107-
add_padding_x({ &a[i], &b[i], &output[i] });
91+
add_padding_x({ &a, &b, &output });
10892

109-
// Allocate tensors
110-
a[i].allocator()->allocate();
111-
b[i].allocator()->allocate();
112-
output[i].allocator()->allocate();
93+
// Allocate tensors
94+
a.allocator()->allocate();
95+
b.allocator()->allocate();
96+
output.allocator()->allocate();
11397

114-
ARM_COMPUTE_ASSERT(!a[i].info()->is_resizable());
115-
ARM_COMPUTE_ASSERT(!b[i].info()->is_resizable());
116-
ARM_COMPUTE_ASSERT(!output[i].info()->is_resizable());
117-
}
98+
ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
99+
ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
100+
ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
118101

119-
ITensorPack pack [NUM_THREADS];
102+
ITensorPack pack =
103+
{
104+
{ arm_compute::TensorType::ACL_SRC_0, &a },
105+
{ arm_compute::TensorType::ACL_SRC_1, &b },
106+
{ arm_compute::TensorType::ACL_DST, &output }
107+
};
120108

121-
#ifndef BARE_METAL
122-
std::vector<std::thread> threads;
109+
// Fill tensors
110+
fill_quantized(AccessorType(a), 0 + finfo.hash);
111+
fill_quantized(AccessorType(b), 1 + finfo.hash);
123112

124-
if(num_parallel_runs > 1)
113+
if (accumulate)
125114
{
126-
threads.reserve(num_parallel_runs);
115+
ARM_COMPUTE_ASSERT(accumulate != run_twice);
116+
fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output);
127117
}
128-
#endif // ifndef BARE_METAL
129118

130-
for(int i = 0; i < num_parallel_runs; ++i)
119+
if(is_fused)
131120
{
132-
// these are newly created every call of this lambda function
133-
pack[i] =
134-
{
135-
{ arm_compute::TensorType::ACL_SRC_0, &a[i] },
136-
{ arm_compute::TensorType::ACL_SRC_1, &b[i] },
137-
{ arm_compute::TensorType::ACL_DST, out_ptrs[i] }
138-
};
121+
ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
122+
bias.allocator()->allocate();
123+
ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
124+
fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
125+
pack.add_tensor(arm_compute::TensorType::ACL_SRC_2, &bias);
126+
}
139127

140-
// Fill tensors
141-
fill_quantized(AccessorType(a[i]), 0 + finfo.hash);
142-
fill_quantized(AccessorType(b[i]), 1 + finfo.hash);
128+
auto mg = MemoryGroup{};
129+
auto ws = manage_workspace<Tensor>(gemmlowp.workspace(), mg, pack, pack);
143130

144-
if (accumulate)
145-
{
146-
ARM_COMPUTE_ASSERT(accumulate != run_twice);
147-
fill(AccessorType(output[i]), 6 + finfo.hash, finfo.min_output, finfo.max_output);
148-
}
131+
// Run with variable inputs.
132+
if(run_twice)
133+
{
134+
gemmlowp.run(pack);
135+
fill_quantized(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run
136+
fill_quantized(AccessorType(b), 4 + finfo.hash);
149137

150138
if(is_fused)
151139
{
152-
ARM_COMPUTE_ASSERT(bias[i].info()->is_resizable());
153-
bias[i].allocator()->allocate();
154-
ARM_COMPUTE_ASSERT(!bias[i].info()->is_resizable());
155-
fill(AccessorType(bias[i]), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
156-
pack[i].add_tensor(arm_compute::TensorType::ACL_SRC_2, &bias[i]);
157-
}
158-
159-
// Run with variable inputs.
160-
if(run_twice)
161-
{
162-
auto mg = MemoryGroup{};
163-
auto ws = manage_workspace<Tensor>(gemmlowp.workspace(), mg, pack[i], pack[i]);
164-
165-
gemmlowp.run(pack[i]);
166-
fill_quantized(AccessorType(a[i]), 3 + finfo.hash); // Fill tensors with new seed after run
167-
fill_quantized(AccessorType(b[i]), 4 + finfo.hash);
168-
if(is_fused)
169-
{
170-
fill(AccessorType(bias[i]), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
171-
}
172-
}
173-
174-
// Compute GEMM function
175-
#ifndef BARE_METAL
176-
if(num_parallel_runs > 1)
177-
{
178-
threads.emplace_back([&,i]
179-
{
180-
auto mg = MemoryGroup{};
181-
auto ws = manage_workspace<Tensor>(gemmlowp.workspace(), mg, pack[i], pack[i]);
182-
183-
gemmlowp.run(pack[i]);
184-
targets[i] =std::move(*(out_ptrs[i]));
185-
});
186-
}
187-
else
188-
#endif // ifndef BARE_METAL
189-
{
190-
auto mg = MemoryGroup{};
191-
auto ws = manage_workspace<Tensor>(gemmlowp.workspace(), mg, pack[i], pack[i]);
192-
193-
gemmlowp.run(pack[i]);
194-
targets[i] = std::move(*(out_ptrs[i]));
140+
fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
195141
}
196142
}
197143

198-
#ifndef BARE_METAL
199-
if(num_parallel_runs > 1)
200-
{
201-
for(int i = 0; i < num_parallel_runs; ++i)
202-
{
203-
threads[i].join();
204-
}
205-
}
206-
#endif // ifndef BARE_METAL
144+
// Compute GEMM function
145+
gemmlowp.run(pack);
146+
147+
return output;
207148
}
208149
} // namespace
209150

@@ -219,28 +160,23 @@ class CpuGEMMLowpMatrixMultiplyCoreValidationFixture : protected GEMMLowpGeneric
219160

220161
bool accumulate = false;
221162
bool dynamic_qinfo = false;
222-
this->_num_parallel_runs = 1;
223-
compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
224-
this->_references[0] = this->compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
163+
this->_target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
164+
this->_reference = this->compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
225165
}
226166

227167
protected:
228-
void compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
168+
TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
229169
{
230170
const auto output_qinfo = QuantizationInfo(); // No output stage
231-
compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN, this->_num_parallel_runs, this->_targets);
171+
return compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN);
232172
}
233-
234-
int _num_parallel_runs{};
235-
TensorType _targets[NUM_THREADS];
236-
SimpleTensor<int32_t> _references[NUM_THREADS];
237173
};
238174

239175
template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
240176
class CpuGEMMLowpStaticQuantMatrixMultiplyCoreValidationFixture : protected CpuGEMMLowpMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
241177
{
242178
public:
243-
void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type, bool is_multithreaded)
179+
void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type)
244180
{
245181
ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8_SIGNED || data_type == DataType::QASYMM8);
246182
const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
@@ -249,30 +185,26 @@ class CpuGEMMLowpStaticQuantMatrixMultiplyCoreValidationFixture : protected CpuG
249185

250186
bool accumulate = false;
251187
bool dynamic_qinfo = true;
252-
this->_num_parallel_runs = is_multithreaded ? NUM_THREADS : 1;
253-
compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo, data_type);
254-
compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, data_type);
188+
this->_target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo, data_type);
189+
this->_reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, data_type);
255190
}
256191

257192
protected:
258-
void compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo, const DataType data_type)
193+
TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo, const DataType data_type)
259194
{
260195
const auto output_qinfo = QuantizationInfo(a_qinfo.scale(), a_qinfo.offset()); // No output stage
261-
compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN, this->_num_parallel_runs, this->_targets);
196+
return compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN);
262197
}
263198

264-
void compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const DataType data_type)
199+
SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const DataType data_type)
265200
{
266-
for(int i = 0; i < this->_num_parallel_runs; ++i)
201+
if(data_type == DataType::QASYMM8)
202+
{
203+
return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
204+
}
205+
else
267206
{
268-
if(data_type == DataType::QASYMM8)
269-
{
270-
this->_references[i] = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
271-
}
272-
else
273-
{
274-
this->_references[i] = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
275-
}
207+
return compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
276208
}
277209
}
278210
};

0 commit comments

Comments
 (0)