@@ -29,7 +29,7 @@ using namespace std;
29
29
// -----------------------------------------------------------------------------
30
30
31
31
#define _NUM_ITER 10
32
- #define _IN_OUT_SIZE 2000000
32
+ #define _IN_OUT_SIZE 20000000
33
33
#define _FILTER_SIZE 127
34
34
#define _PRINT false
35
35
@@ -150,71 +150,92 @@ void Verification(const univector<T, _IN_OUT_SIZE> &outputExpected,
150
150
// -----------------------------------------------------------------------------
151
151
152
152
// Benchmarks with f32/float type.
153
- BENCHMARK_CAPTURE (DAP_OPS_FIR, buddy_scalar_f32, dap::FIR<float , 1 >, false )
154
- ->Unit(benchmark::kMillisecond )
155
- ->Iterations(_NUM_ITER);
156
- BENCHMARK_CAPTURE (DAP_OPS_FIR, mlir_vector_f32, _mlir_ciface_fir_vector_f32)
157
- ->Unit(benchmark::kMillisecond )
158
- ->Iterations(_NUM_ITER);
153
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, buddy_scalar_f32, dap::FIR<float, 1>, false)
154
+ // ->Unit(benchmark::kMillisecond)
155
+ // ->Iterations(_NUM_ITER);
156
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, mlir_vector_f32, _mlir_ciface_fir_vector_f32)
157
+ // ->Unit(benchmark::kMillisecond)
158
+ // ->Iterations(_NUM_ITER);
159
159
BENCHMARK_CAPTURE (DAP_OPS_FIR, buddy_tiled_vector_f32, dap::FIR<float , 1 >, true )
160
- ->Unit(benchmark::kMillisecond )
161
- ->Iterations(_NUM_ITER);
162
- BENCHMARK (KFR_FIR_f32)->Unit(benchmark::kMillisecond )->Iterations(_NUM_ITER);
163
- // Benchmarks with f64/double type.
164
- BENCHMARK_CAPTURE (DAP_OPS_FIR, buddy_scalar_f64, dap::FIR<double , 1 >, false )
165
- ->Unit(benchmark::kMillisecond )
166
- ->Iterations(_NUM_ITER);
167
- BENCHMARK_CAPTURE (DAP_OPS_FIR, mlir_vector_f64, _mlir_ciface_fir_vector_f64)
168
- ->Unit(benchmark::kMillisecond )
169
- ->Iterations(_NUM_ITER);
170
- BENCHMARK_CAPTURE (DAP_OPS_FIR, buddy_tiled_vector_f64, dap::FIR<double , 1 >,
171
- true )
172
- ->Unit(benchmark::kMillisecond )
173
- ->Iterations(_NUM_ITER);
174
- BENCHMARK (KFR_FIR_f64)->Unit(benchmark::kMillisecond )->Iterations(_NUM_ITER);
175
- // Benchmarks for vector size and tile size.
176
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts64_f32, _mlir_ciface_buddy_fir_vs_16_ts_64_f32)
177
- ->Unit(benchmark::kMillisecond )
178
- ->Iterations(20 );
179
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts128_f32, _mlir_ciface_buddy_fir_vs_16_ts_128_f32)
180
- ->Unit(benchmark::kMillisecond )
181
- ->Iterations(_NUM_ITER);
182
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs8_ts256_f32, _mlir_ciface_buddy_fir_vs_8_ts_256_f32)
183
160
->Unit(benchmark::kMillisecond )
184
161
->Iterations(20 );
185
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts240_f32, _mlir_ciface_buddy_fir_vs_16_ts_240_f32)
162
+ // BENCHMARK(KFR_FIR_f32)->Unit(benchmark::kMillisecond)->Iterations(_NUM_ITER);
163
+ // // Benchmarks with f64/double type.
164
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, buddy_scalar_f64, dap::FIR<double, 1>, false)
165
+ // ->Unit(benchmark::kMillisecond)
166
+ // ->Iterations(_NUM_ITER);
167
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, mlir_vector_f64, _mlir_ciface_fir_vector_f64)
168
+ // ->Unit(benchmark::kMillisecond)
169
+ // ->Iterations(_NUM_ITER);
170
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, buddy_tiled_vector_f64, dap::FIR<double, 1>,
171
+ // true)
172
+ // ->Unit(benchmark::kMillisecond)
173
+ // ->Iterations(_NUM_ITER);
174
+ // BENCHMARK(KFR_FIR_f64)->Unit(benchmark::kMillisecond)->Iterations(_NUM_ITER);
175
+ // // Benchmarks for vector size and tile size.
176
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts64_f32, _mlir_ciface_buddy_fir_vs_16_ts_64_uf_1_f32)
177
+ // ->Unit(benchmark::kMillisecond)
178
+ // ->Iterations(20);
179
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts128_f32, _mlir_ciface_buddy_fir_vs_16_ts_128_uf_1_f32)
180
+ // ->Unit(benchmark::kMillisecond)
181
+ // ->Iterations(_NUM_ITER);
182
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs8_ts256_f32, _mlir_ciface_buddy_fir_vs_8_ts_256_uf_1_f32)
183
+ // ->Unit(benchmark::kMillisecond)
184
+ // ->Iterations(20);
185
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts216_f32, _mlir_ciface_buddy_fir_vs_16_ts_240_uf_1_f32)
186
+ // ->Unit(benchmark::kMillisecond)
187
+ // ->Iterations(20);
188
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts240_f32, _mlir_ciface_buddy_fir_vs_16_ts_240_uf_1_f32)
189
+ // ->Unit(benchmark::kMillisecond)
190
+ // ->Iterations(20);
191
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts256_f32, _mlir_ciface_buddy_fir_vs_16_ts_256_uf_1_f32)
192
+ // ->Unit(benchmark::kMillisecond)
193
+ // ->Iterations(20);
194
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile512_uf1_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_1_f32)
186
195
->Unit(benchmark::kMillisecond )
187
196
->Iterations(20 );
188
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts256_f32, _mlir_ciface_buddy_fir_vs_16_ts_256_f32 )
197
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile512_uf2_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_2_f32 )
189
198
->Unit(benchmark::kMillisecond )
190
199
->Iterations(20 );
191
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts512_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_f32 )
200
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile512_uf4_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_4_f32 )
192
201
->Unit(benchmark::kMillisecond )
193
202
->Iterations(20 );
194
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts1024_f32, _mlir_ciface_buddy_fir_vs_16_ts_1024_f32 )
203
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile512_uf8_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_8_f32 )
195
204
->Unit(benchmark::kMillisecond )
196
205
->Iterations(20 );
197
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts2048_f32, _mlir_ciface_buddy_fir_vs_16_ts_2048_f32 )
206
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile512_uf16_f32, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_16_f32 )
198
207
->Unit(benchmark::kMillisecond )
199
208
->Iterations(20 );
200
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts4096_f32, _mlir_ciface_buddy_fir_vs_16_ts_4096_f32)
201
- ->Unit(benchmark::kMillisecond )
202
- ->Iterations(20 );
203
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs16_ts8192_f32, _mlir_ciface_buddy_fir_vs_16_ts_8192_f32)
204
- ->Unit(benchmark::kMillisecond )
205
- ->Iterations(20 );
206
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs8_ts256_f64, _mlir_ciface_buddy_fir_vs_8_ts_256_f64)
207
- ->Unit(benchmark::kMillisecond )
208
- ->Iterations(20 );
209
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs8_ts128_f64, _mlir_ciface_buddy_fir_vs_8_ts_128_f64)
210
- ->Unit(benchmark::kMillisecond )
211
- ->Iterations(20 );
212
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs8_ts512_f64, _mlir_ciface_buddy_fir_vs_8_ts_512_f64)
209
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, buddy_tiled_vector_f32, dap::FIR<float , 1 >, true )
213
210
->Unit(benchmark::kMillisecond )
214
211
->Iterations(20 );
215
- BENCHMARK_CAPTURE (DAP_OPS_FIR, vs8_ts1024_f64, _mlir_ciface_buddy_fir_vs_8_ts_1024_f64 )
212
+ BENCHMARK_CAPTURE (DAP_OPS_FIR, vec16_tile1024_uf1_f32, _mlir_ciface_buddy_fir_vs_16_ts_1024_uf_1_f32 )
216
213
->Unit(benchmark::kMillisecond )
217
214
->Iterations(20 );
215
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts2048_f32, _mlir_ciface_buddy_fir_vs_16_ts_2048_uf_1_f32)
216
+ // ->Unit(benchmark::kMillisecond)
217
+ // ->Iterations(20);
218
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts4096_f32, _mlir_ciface_buddy_fir_vs_16_ts_4096_uf_1_f32)
219
+ // ->Unit(benchmark::kMillisecond)
220
+ // ->Iterations(20);
221
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts8192_f32, _mlir_ciface_buddy_fir_vs_16_ts_8192_uf_1_f32)
222
+ // ->Unit(benchmark::kMillisecond)
223
+ // ->Iterations(20);
224
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs8_ts256_f64, _mlir_ciface_buddy_fir_vs_8_ts_256_uf_1_f64)
225
+ // ->Unit(benchmark::kMillisecond)
226
+ // ->Iterations(20);
227
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs8_ts128_f64, _mlir_ciface_buddy_fir_vs_8_ts_128_uf_1_f64)
228
+ // ->Unit(benchmark::kMillisecond)
229
+ // ->Iterations(20);
230
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs8_ts512_f64, _mlir_ciface_buddy_fir_vs_8_ts_512_uf_1_f64)
231
+ // ->Unit(benchmark::kMillisecond)
232
+ // ->Iterations(20);
233
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs8_ts1024_f64, _mlir_ciface_buddy_fir_vs_8_ts_1024_uf_1_f64)
234
+ // ->Unit(benchmark::kMillisecond)
235
+ // ->Iterations(20);
236
+ // BENCHMARK_CAPTURE(DAP_OPS_FIR, vs16_ts512_f64, _mlir_ciface_buddy_fir_vs_16_ts_512_uf_1_f64)
237
+ // ->Unit(benchmark::kMillisecond)
238
+ // ->Iterations(20);
218
239
219
240
// -----------------------------------------------------------------------------
220
241
// Main Function.
0 commit comments