Skip to content

Commit 7f31b0b

Browse files
committed
test: Add thread safety tests for CpuTranspose
Resolves: COMPMID-8391 Change-Id: Ie1f3319a6e6f56a5d324be5a88d1112fef4c39f9 Signed-off-by: Syed Wajahat Abbas Naqvi <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/15066 Tested-by: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Reviewed-by: Dongsung Kim <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]>
1 parent 63f2a0d commit 7f31b0b

File tree

3 files changed

+264
-41
lines changed

3 files changed

+264
-41
lines changed

tests/validation/fixtures/CpuTransposeFixture.h

Lines changed: 156 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -32,78 +32,198 @@
3232
#include "tests/IAccessor.h"
3333
#include "tests/framework/Asserts.h"
3434
#include "tests/framework/Fixture.h"
35+
#include "tests/validation/Helpers.h"
3536
#include "tests/validation/reference/Permute.h"
3637

38+
#if !defined(BARE_METAL)
39+
#include <thread>
40+
#include <vector>
41+
#endif // !defined(BARE_METAL)
42+
3743
namespace arm_compute
3844
{
3945
namespace test
4046
{
4147
namespace validation
4248
{
49+
namespace
50+
{
51+
constexpr int NUM_THREADS = 3;
52+
}// namespace
4353
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
44-
class CpuTransposeValidationFixture : public framework::Fixture
54+
class CpuTransposeGenericFixture : public framework::Fixture
4555
{
4656
public:
47-
void setup(TensorShape shape, DataType data_type)
57+
void setup(TensorShape shape, DataType data_type, QuantizationInfo qinfo, TestType test_type = TestType::ConfigureOnceRunOnce)
4858
{
49-
_target = compute_target(shape, data_type);
50-
_reference = compute_reference(shape, data_type);
59+
if (std::is_same<TensorType, Tensor>::value && // Cpu
60+
data_type == DataType::F16 && !CPUInfo::get().has_fp16())
61+
{
62+
return;
63+
}
64+
_test_type = test_type;
65+
_num_parallel_runs = (_test_type == TestType::ConfigureOnceRunMultiThreaded ? NUM_THREADS : 1);
66+
67+
compute_target(shape, data_type, qinfo);
68+
compute_reference(shape, data_type, qinfo);
5169
}
5270

5371
protected:
5472
template <typename U>
5573
void fill(U &&tensor)
5674
{
57-
library->fill_tensor_uniform(tensor, 0);
75+
if(tensor.data_type() == DataType::F32)
76+
{
77+
std::uniform_real_distribution<float> distribution(-10.0f, 10.0f);
78+
library->fill(tensor, distribution, 0);
79+
}
80+
else if(tensor.data_type() == DataType::F16)
81+
{
82+
arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -10.0f, 10.0f };
83+
library->fill(tensor, distribution, 0);
84+
}
85+
else if(!is_data_type_quantized(tensor.data_type()))
86+
{
87+
std::uniform_int_distribution<> distribution(0, 100);
88+
library->fill(tensor, distribution, 0);
89+
}
90+
else
91+
{
92+
library->fill_tensor_uniform(tensor, 0);
93+
}
5894
}
5995

60-
TensorType compute_target(const TensorShape &shape, DataType data_type)
61-
{
62-
// Make rows the columns of the original shape
63-
TensorShape output_shape{ shape[1], shape[0] };
96+
void allocate_and_fill_tensors(TensorType *src, TensorType *dst){
97+
for(int i = 0; i < _num_parallel_runs; ++i) {
6498

65-
// Create tensors
66-
TensorType src = create_tensor<TensorType>(shape, data_type);
67-
TensorType dst = create_tensor<TensorType>(output_shape, data_type);
99+
ARM_COMPUTE_ASSERT(src[i].info()->is_resizable());
100+
ARM_COMPUTE_ASSERT(dst[i].info()->is_resizable());
68101

69-
// Create and configure function
70-
FunctionType trans_func;
71-
trans_func.configure(src.info(), dst.info());
102+
// Allocate tensors
103+
src[i].allocator()->allocate();
104+
dst[i].allocator()->allocate();
72105

73-
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
74-
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
106+
ARM_COMPUTE_ASSERT(!src[i].info()->is_resizable());
107+
ARM_COMPUTE_ASSERT(!dst[i].info()->is_resizable());
75108

76-
// Allocate tensors
77-
src.allocator()->allocate();
78-
dst.allocator()->allocate();
109+
// Fill tensors
110+
fill(AccessorType(src[i]));
111+
}
112+
}
79113

80-
ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
81-
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
114+
void compute_target(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo)
115+
{
116+
// Create tensors
117+
TensorType src[NUM_THREADS];
118+
TensorType dst[NUM_THREADS];
119+
TensorType *dst_ptrs[NUM_THREADS];
82120

83-
// Fill tensors
84-
fill(AccessorType(src));
121+
// Retain the shape but make rows the columns of the original shape
122+
TensorShape output_shape = shape;
123+
std::swap(output_shape[0], output_shape[1]);
85124

86-
// Compute function
87-
ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC, &src }, { arm_compute::TensorType::ACL_DST, &dst } };
88-
trans_func.run(run_pack);
125+
for(int i = 0; i < _num_parallel_runs; ++i){
126+
src[i] = create_tensor<TensorType>(shape, data_type, 1, qinfo);
127+
dst[i] = create_tensor<TensorType>(output_shape, data_type, 1, qinfo);
128+
dst_ptrs[i] = &dst[i];
129+
}
89130

90-
return dst;
131+
// Create and configure function
132+
FunctionType trans_func;
133+
trans_func.configure(src[0].info(), dst_ptrs[0]->info());
134+
135+
allocate_and_fill_tensors(src, dst);
136+
137+
if(_test_type == TestType::ConfigureOnceRunMultiThreaded)
138+
{
139+
#ifndef BARE_METAL
140+
141+
ITensorPack run_pack[NUM_THREADS];
142+
std::vector<std::thread> threads;
143+
144+
threads.reserve(_num_parallel_runs);
145+
for(int i = 0; i < _num_parallel_runs; ++i)
146+
{
147+
// Compute function
148+
run_pack[i] = { {arm_compute::TensorType::ACL_SRC, &src[i]},
149+
{arm_compute::TensorType::ACL_DST, dst_ptrs[i]}};
150+
151+
threads.emplace_back([&,i]
152+
{
153+
trans_func.run(run_pack[i]);
154+
_target[i] = std::move(*(dst_ptrs[i]));
155+
});
156+
}
157+
for(int i = 0; i < _num_parallel_runs; ++i)
158+
{
159+
threads[i].join();
160+
}
161+
#endif // ifndef BARE_METAL
162+
}
163+
else
164+
{
165+
// Compute function
166+
ITensorPack run_pack{{ arm_compute::TensorType::ACL_SRC, &src[0]},
167+
{arm_compute::TensorType::ACL_DST, dst_ptrs[0]}};
168+
trans_func.run(run_pack);
169+
_target[0] = std::move(*(dst_ptrs[0]));
170+
}
91171
}
92172

93-
SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
173+
void compute_reference(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo)
94174
{
95175
// Create reference
96-
SimpleTensor<T> src{ shape, data_type };
176+
SimpleTensor<T> src{shape, data_type, 1, qinfo};
177+
178+
for(int i = 0; i < _num_parallel_runs; ++i)
179+
{
180+
// Fill reference
181+
fill(src);
182+
_reference[i] = reference::permute<T>(src, PermutationVector(1U, 0U));
183+
}
184+
}
97185

98-
// Fill reference
99-
fill(src);
186+
TensorType _target[NUM_THREADS];
187+
SimpleTensor<T> _reference[NUM_THREADS];
188+
TestType _test_type{};
189+
int _num_parallel_runs{};
190+
};
100191

101-
return reference::permute<T>(src, PermutationVector(1U, 0U));
192+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
193+
class CpuTransposeValidationFixture
194+
: public CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>
195+
{
196+
public:
197+
void setup(const TensorShape &shape, DataType data_type)
198+
{
199+
CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, QuantizationInfo());
102200
}
201+
};
103202

104-
TensorType _target{};
105-
SimpleTensor<T> _reference{};
203+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
204+
class CpuTransposeThreadSafeValidationFixture
205+
: public CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>
206+
{
207+
public:
208+
void setup(const TensorShape &shape, DataType data_type)
209+
{
210+
CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, QuantizationInfo(),
211+
TestType::ConfigureOnceRunMultiThreaded);
212+
}
106213
};
214+
215+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
216+
class CpuTransposeQuantizedThreadSafeValidationFixture
217+
: public CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>
218+
{
219+
public:
220+
void setup(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo)
221+
{
222+
CpuTransposeGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, qinfo,
223+
TestType::ConfigureOnceRunMultiThreaded);
224+
}
225+
};
226+
107227
} // namespace validation
108228
} // namespace test
109229
} // namespace arm_compute

tests/validation/reference/Permute.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2019,2024 Arm Limited.
2+
* Copyright (c) 2017-2019,2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -63,6 +63,7 @@ template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, Permuta
6363
template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
6464
template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
6565
template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm);
66+
template SimpleTensor<int32_t> permute(const SimpleTensor<int32_t> &src, PermutationVector perm);
6667
template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm);
6768
template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
6869
template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);

tests/validation/runtime/experimental/operators/CpuTranspose.cpp

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -40,6 +40,12 @@ namespace test
4040
{
4141
namespace validation
4242
{
43+
namespace
44+
{
45+
using framework::dataset::make;
46+
47+
} // namespace
48+
4349
TEST_SUITE(NEON)
4450
TEST_SUITE(OPERATORS)
4551

@@ -48,15 +54,111 @@ TEST_SUITE(CpuTranspose)
4854
template <typename T>
4955
using CpuTransposeFixture = CpuTransposeValidationFixture<Tensor, Accessor, experimental::op::CpuTranspose, T>;
5056

57+
template <typename T>
58+
using CpuTransposeThreadSafeFixture =
59+
CpuTransposeThreadSafeValidationFixture<Tensor, Accessor, experimental::op::CpuTranspose, T>;
60+
61+
template <typename T>
62+
using CpuTransposeQuantizedThreadSafeFixture =
63+
CpuTransposeQuantizedThreadSafeValidationFixture<Tensor, Accessor, experimental::op::CpuTranspose, T>;
64+
5165
TEST_SUITE(U8)
5266
FIXTURE_DATA_TEST_CASE(SmokeTest, CpuTransposeFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
53-
framework::dataset::make("DataType", DataType::U8)))
67+
make("DataType", DataType::U8)))
5468
{
5569
// Validate output
56-
validate(Accessor(_target), _reference);
70+
for(int i = 0; i < _num_parallel_runs; ++i)
71+
{
72+
validate(Accessor(_target[i]), _reference[i]);
73+
}
5774
}
58-
TEST_SUITE_END() // U8
75+
TEST_SUITE_END() //U8
5976

77+
#ifndef BARE_METAL
78+
TEST_SUITE(ThreadSafety)
79+
TEST_SUITE(Float)
80+
TEST_SUITE(F32)
81+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads,
82+
CpuTransposeThreadSafeFixture<float>,
83+
framework::DatasetMode::ALL,
84+
combine(datasets::Small2DShapes(), make("DataType", DataType::F32)))
85+
{
86+
// Validate output
87+
for(int i = 0; i < _num_parallel_runs; ++i)
88+
{
89+
validate(Accessor(_target[i]), _reference[i]);
90+
}
91+
}
92+
TEST_SUITE_END() // F32
93+
#ifdef ARM_COMPUTE_ENABLE_FP16
94+
TEST_SUITE(F16)
95+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads,
96+
CpuTransposeThreadSafeFixture<half>,
97+
framework::DatasetMode::ALL,
98+
combine(datasets::Tiny4DShapes(), make("DataType", DataType::F16)))
99+
{
100+
if (CPUInfo::get().has_fp16())
101+
{
102+
// Validate output
103+
for(int i = 0; i < _num_parallel_runs; ++i)
104+
{
105+
validate(Accessor(_target[i]), _reference[i]);
106+
}
107+
}
108+
else
109+
{
110+
ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
111+
framework::ARM_COMPUTE_PRINT_INFO();
112+
}
113+
}
114+
TEST_SUITE_END() // F16
115+
#endif // ARM_COMPUTE_ENABLE_FP16
116+
TEST_SUITE_END() // Float
117+
TEST_SUITE(Integer)
118+
TEST_SUITE(S32)
119+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads,
120+
CpuTransposeThreadSafeFixture<int32_t>,
121+
framework::DatasetMode::ALL,
122+
combine(datasets::Tiny4DShapes(), make("DataType", DataType::S32)))
123+
{
124+
// Validate output
125+
for(int i = 0; i < _num_parallel_runs; ++i)
126+
{
127+
validate(Accessor(_target[i]), _reference[i]);
128+
}
129+
}
130+
TEST_SUITE_END() // S32
131+
TEST_SUITE_END() // Integer
132+
TEST_SUITE(Quantized)
133+
TEST_SUITE(QASYMM8_SIGNED)
134+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads,
135+
CpuTransposeQuantizedThreadSafeFixture<int8_t>,
136+
framework::DatasetMode::ALL,
137+
combine(datasets::Tiny4DShapes(), make("DataType", DataType::QASYMM8_SIGNED), make("QuantizationInfoIn", {QuantizationInfo(0.5f, 0)})))
138+
{
139+
// Validate output
140+
for(int i = 0; i < _num_parallel_runs; ++i)
141+
{
142+
validate(Accessor(_target[i]), _reference[i]);
143+
}
144+
}
145+
TEST_SUITE_END() // QASYMM8_SIGNED
146+
TEST_SUITE(QASYMM8)
147+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads,
148+
CpuTransposeQuantizedThreadSafeFixture<uint8_t>,
149+
framework::DatasetMode::ALL,
150+
combine(datasets::Tiny4DShapes(), make("DataType", DataType::QASYMM8), make("QuantizationInfoIn", {QuantizationInfo(0.5f, 0)})))
151+
{
152+
// Validate output
153+
for(int i = 0; i < _num_parallel_runs; ++i)
154+
{
155+
validate(Accessor(_target[i]), _reference[i]);
156+
}
157+
}
158+
TEST_SUITE_END() // QASYMM8
159+
TEST_SUITE_END() // Quantized
160+
TEST_SUITE_END() // ThreadSafety
161+
#endif // #ifndef BARE_METAL
60162
TEST_SUITE_END() // CpuTranspose
61163

62164
TEST_SUITE_END() // OPERATORS

0 commit comments

Comments
 (0)