diff --git a/.gitignore b/.gitignore index 6e713a3e..8fa31ea6 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ # ignore output text files testlog.txt res_*.txt +*.log # Release files release_* diff --git a/CMakeLists.txt b/CMakeLists.txt index 40e1b723..1470e41f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,3 +194,24 @@ endif() if(IOS_PLATFORM AND IOS_DEMO) add_subdirectory(ios) endif() + +# This may run the static code analyzer CppCheck on all the project sources, +# if it is available on the system. +# NOTE This only works with CMake 3.10 or later +# If you want to run CppCheck manually, +# you may use this command: +# cppcheck --enable=all --std=c99 . 2>&1 \ +# | grep -v " is never used." \ +# | grep -v "Checking " \ +# | grep -v " files checked " \ +# > cppcheck.log +find_program(CMAKE_CXX_CPPCHECK NAMES cppcheck) +if(CMAKE_CXX_CPPCHECK) + list( + APPEND CMAKE_CXX_CPPCHECK + "--std=c99" + "--enable=all" + "--inconclusive" + "--inline-suppr" + ) +endif() diff --git a/modules/NE10_init.c b/modules/NE10_init.c index 8ed94f7f..73c29e1b 100644 --- a/modules/NE10_init.c +++ b/modules/NE10_init.c @@ -43,15 +43,14 @@ ne10_result_t ne10_HasNEON() ne10_result_t ne10_init() { - ne10_result_t status = NE10_ERR; + ne10_result_t status; #ifndef __MACH__ - FILE* infofile = NULL; // To open the file /proc/cpuinfo ne10_int8_t cpuinfo[CPUINFO_BUFFER_SIZE]; // The buffer to read in the string ne10_uint32_t bytes = 0; // Numbers of bytes read from the file ne10_int32_t i = 0; // Temporary loop counter memset (cpuinfo, 0, CPUINFO_BUFFER_SIZE); - infofile = fopen ("/proc/cpuinfo", "r"); + FILE* infofile = fopen ("/proc/cpuinfo", "r"); if (!infofile) { diff --git a/modules/dsp/NE10_fft.c b/modules/dsp/NE10_fft.c index 64efe5ff..2f6fcf2e 100644 --- a/modules/dsp/NE10_fft.c +++ b/modules/dsp/NE10_fft.c @@ -356,14 +356,13 @@ ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon (ne10_int32_t nfft) return ne10_fft_alloc_c2c_float32_c (nfft); } - ne10_fft_cfg_float32_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_float32_t) + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */ + sizeof (ne10_fft_cpx_float32_t) * nfft /* twiddles */ + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded); + ne10_fft_cfg_float32_t st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded); // Bad allocation. if (st == NULL) @@ -459,14 +458,13 @@ ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon (ne10_int32_t nfft) return ne10_fft_alloc_c2c_int32_c (nfft); } - ne10_fft_cfg_int32_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_int32_t) + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */ + sizeof (ne10_fft_cpx_int32_t) * nfft /* twiddles */ + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded); + ne10_fft_cfg_int32_t st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded); // Bad allocation. if (st == NULL) diff --git a/modules/dsp/NE10_fft_float32.c b/modules/dsp/NE10_fft_float32.c index 8e7737d0..941be3fb 100644 --- a/modules/dsp/NE10_fft_float32.c +++ b/modules/dsp/NE10_fft_float32.c @@ -828,14 +828,13 @@ static void ne10_fft_split_c2r_1d_float32 (ne10_fft_cpx_float32_t *dst, */ ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c (ne10_int32_t nfft) { - ne10_fft_cfg_float32_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_float32_t) + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */ + sizeof (ne10_fft_cpx_float32_t) * nfft /* twiddles */ + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded); + ne10_fft_cfg_float32_t st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded); if (st == NULL) { @@ -943,7 +942,6 @@ void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout, */ ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32 (ne10_int32_t nfft) { - ne10_fft_r2c_cfg_float32_t st = NULL; ne10_int32_t ncfft = nfft >> 1; ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_float32_t) @@ -953,7 +951,7 @@ ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32 (ne10_int32_t nfft) + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_r2c_cfg_float32_t) NE10_MALLOC (memneeded); + ne10_fft_r2c_cfg_float32_t st = (ne10_fft_r2c_cfg_float32_t) NE10_MALLOC (memneeded); if (st) { diff --git a/modules/dsp/NE10_fft_float32.neon.c b/modules/dsp/NE10_fft_float32.neon.c index 82bae2fd..f0f8da8a 100644 --- a/modules/dsp/NE10_fft_float32.neon.c +++ b/modules/dsp/NE10_fft_float32.neon.c @@ -477,7 +477,6 @@ static void ne10_fft_split_r2c_1d_float32_neon (ne10_fft_cpx_float32_t *dst, float32x4_t q_tw_r, q_tw_i; float32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3, q_val; float32x4_t q_dst_r, q_dst_i, q_dst2_r, q_dst2_i; - float32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; tdc.r = src[0].r; tdc.i = src[0].i; @@ -490,11 +489,11 @@ static void ne10_fft_split_r2c_1d_float32_neon (ne10_fft_cpx_float32_t *dst, { for (k = 1; k <= count ; k += 4) { - p_src = (float32_t*) (& (src[k])); - p_src2 = (float32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (float32_t*) (& (twiddles[k - 1])); - p_dst = (float32_t*) (& (dst[k])); - p_dst2 = (float32_t*) (& (dst[ncfft - k - 3])); + float32_t* p_src = (float32_t*) (& (src[k])); + float32_t* p_src2 = (float32_t*) (& (src[ncfft - k - 3])); + float32_t* p_twiddles = (float32_t*) (& (twiddles[k - 1])); + float32_t* p_dst = (float32_t*) (& (dst[k])); + float32_t* p_dst2 = (float32_t*) (& (dst[ncfft - k - 3])); q2_fpk = vld2q_f32 (p_src); q2_fpnk = vld2q_f32 (p_src2); @@ -575,7 +574,6 @@ static void ne10_fft_split_c2r_1d_float32_neon (ne10_fft_cpx_float32_t *dst, float32x4_t q_fek_r, q_fek_i, q_fok_r, q_fok_i; float32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3, q_val; float32x4_t q_dst2_r, q_dst2_i; - float32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; dst[0].r = (src[0].r + src[ncfft].r) * 0.5f; dst[0].i = (src[0].r - src[ncfft].r) * 0.5f; @@ -584,11 +582,11 @@ static void ne10_fft_split_c2r_1d_float32_neon (ne10_fft_cpx_float32_t *dst, { for (k = 1; k <= count ; k += 4) { - p_src = (float32_t*) (& (src[k])); - p_src2 = (float32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (float32_t*) (& (twiddles[k - 1])); - p_dst = (float32_t*) (& (dst[k])); - p_dst2 = (float32_t*) (& (dst[ncfft - k - 3])); + float32_t* p_src = (float32_t*) (& (src[k])); + float32_t* p_src2 = (float32_t*) (& (src[ncfft - k - 3])); + float32_t* p_twiddles = (float32_t*) (& (twiddles[k - 1])); + float32_t* p_dst = (float32_t*) (& (dst[k])); + float32_t* p_dst2 = (float32_t*) (& (dst[ncfft - k - 3])); q2_fk = vld2q_f32 (p_src); q2_fnkc = vld2q_f32 (p_src2); diff --git a/modules/dsp/NE10_fft_int16.c b/modules/dsp/NE10_fft_int16.c index 1066bd49..f8edf253 100644 --- a/modules/dsp/NE10_fft_int16.c +++ b/modules/dsp/NE10_fft_int16.c @@ -1071,14 +1071,13 @@ static void ne10_fft_split_c2r_1d_int16 (ne10_fft_cpx_int16_t *dst, */ ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16 (ne10_int32_t nfft) { - ne10_fft_cfg_int16_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_int16_t) + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */ + sizeof (ne10_fft_cpx_int16_t) * (nfft) /* twiddles */ + sizeof (ne10_fft_cpx_int16_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_cfg_int16_t) NE10_MALLOC (memneeded); + ne10_fft_cfg_int16_t st = (ne10_fft_cfg_int16_t) NE10_MALLOC (memneeded); if (st) { @@ -1163,7 +1162,6 @@ void ne10_fft_c2c_1d_int16_c (ne10_fft_cpx_int16_t *fout, */ ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16 (ne10_int32_t nfft) { - ne10_fft_r2c_cfg_int16_t st = NULL; ne10_int32_t ncfft = nfft >> 1; ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int16_t) @@ -1173,7 +1171,7 @@ ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16 (ne10_int32_t nfft) + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_r2c_cfg_int16_t) NE10_MALLOC (memneeded); + ne10_fft_r2c_cfg_int16_t st = (ne10_fft_r2c_cfg_int16_t) NE10_MALLOC (memneeded); if (st) { diff --git a/modules/dsp/NE10_fft_int16.neon.c b/modules/dsp/NE10_fft_int16.neon.c index 40c0f569..7144bb8b 100644 --- a/modules/dsp/NE10_fft_int16.neon.c +++ b/modules/dsp/NE10_fft_int16.neon.c @@ -454,7 +454,6 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, ne10_int32_t ncfft, ne10_int32_t scaled_flag) { - ne10_int32_t k; ne10_int32_t count = ncfft / 2; ne10_fft_cpx_int16_t fpnk, fpk, f1k, f2k, tw, tdc; int16x8x2_t q2_fpk, q2_fpnk, q2_tw, q2_dst, q2_dst2; @@ -463,7 +462,6 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, int16x8_t q_tw_r, q_tw_i; int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int16x8_t q_dst2_r, q_dst2_i; - int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; tdc.r = src[0].r; tdc.i = src[0].i; @@ -479,13 +477,13 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, if (scaled_flag) { - for (k = 1; k <= count ; k += 8) + for (ne10_int32_t k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fpk = vld2q_s16 (p_src); q2_fpnk = vld2q_s16 (p_src2); @@ -529,13 +527,13 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, } else { - for (k = 1; k <= count ; k += 8) + for (ne10_int32_t k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fpk = vld2q_s16 (p_src); q2_fpnk = vld2q_s16 (p_src2); @@ -574,14 +572,12 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i)); vst2q_s16 (p_dst, q2_dst); vst2q_s16 (p_dst2, q2_dst2); - } } } else { - - for (k = 1; k <= ncfft / 2 ; ++k) + for (ne10_int32_t k = 1; k <= ncfft / 2 ; ++k) { fpk = src[k]; fpnk.r = src[ncfft - k].r; @@ -618,7 +614,6 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, ne10_int32_t scaled_flag) { - ne10_int32_t k; ne10_int32_t count = ncfft / 2; ne10_fft_cpx_int16_t fk, fnkc, fek, fok, tmp; int16x8x2_t q2_fk, q2_fnkc, q2_tw, q2_dst, q2_dst2; @@ -626,7 +621,6 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, int16x8_t q_fek_r, q_fek_i, q_fok_r, q_fok_i; int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int16x8_t q_dst2_r, q_dst2_i; - int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; dst[0].r = src[0].r + src[ncfft].r; @@ -638,13 +632,13 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, { if (scaled_flag) { - for (k = 1; k <= count ; k += 8) + for (ne10_int32_t k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fk = vld2q_s16 (p_src); q2_fnkc = vld2q_s16 (p_src2); @@ -687,13 +681,13 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, } else { - for (k = 1; k <= count ; k += 8) + for (ne10_int32_t k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fk = vld2q_s16 (p_src); q2_fnkc = vld2q_s16 (p_src2); @@ -737,7 +731,7 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, else { - for (k = 1; k <= ncfft / 2; k++) + for (ne10_int32_t k = 1; k <= ncfft / 2; k++) { fk = src[k]; fnkc.r = src[ncfft - k].r; diff --git a/modules/dsp/NE10_fft_int16.neonintrinsic.c b/modules/dsp/NE10_fft_int16.neonintrinsic.c index 6f06bab4..042566bf 100644 --- a/modules/dsp/NE10_fft_int16.neonintrinsic.c +++ b/modules/dsp/NE10_fft_int16.neonintrinsic.c @@ -1008,7 +1008,6 @@ void ne10_mixed_radix_fft_forward_int16_##scaled##_neon (ne10_fft_cpx_int16_t * ne10_fft_cpx_int16_t * buffer) \ { \ ne10_int32_t fstride, mstride, N; \ - ne10_int32_t fstride1; \ ne10_int32_t f_count; \ ne10_int32_t stage_count; \ \ @@ -1092,7 +1091,6 @@ void ne10_mixed_radix_fft_backward_int16_##scaled##_neon (ne10_fft_cpx_int16_t * ne10_fft_cpx_int16_t * buffer) \ { \ ne10_int32_t fstride, mstride, N; \ - ne10_int32_t fstride1; \ ne10_int32_t f_count; \ ne10_int32_t stage_count; \ \ @@ -1190,7 +1188,6 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, int16x8_t q_tw_r, q_tw_i; int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int16x8_t q_dst2_r, q_dst2_i; - int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; tdc.r = src[0].r; tdc.i = src[0].i; @@ -1208,11 +1205,11 @@ static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst, { for (k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fpk = vld2q_s16 (p_src); q2_fpnk = vld2q_s16 (p_src2); @@ -1353,7 +1350,6 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, int16x8_t q_fek_r, q_fek_i, q_fok_r, q_fok_i; int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int16x8_t q_dst2_r, q_dst2_i; - int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; dst[0].r = src[0].r + src[ncfft].r; @@ -1367,11 +1363,11 @@ static void ne10_fft_split_c2r_1d_int16_neon (ne10_fft_cpx_int16_t *dst, { for (k = 1; k <= count ; k += 8) { - p_src = (int16_t*) (& (src[k])); - p_src2 = (int16_t*) (& (src[ncfft - k - 7])); - p_twiddles = (int16_t*) (& (twiddles[k - 1])); - p_dst = (int16_t*) (& (dst[k])); - p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); + int16_t* p_src = (int16_t*) (& (src[k])); + int16_t* p_src2 = (int16_t*) (& (src[ncfft - k - 7])); + int16_t* p_twiddles = (int16_t*) (& (twiddles[k - 1])); + int16_t* p_dst = (int16_t*) (& (dst[k])); + int16_t* p_dst2 = (int16_t*) (& (dst[ncfft - k - 7])); q2_fk = vld2q_s16 (p_src); q2_fnkc = vld2q_s16 (p_src2); diff --git a/modules/dsp/NE10_fft_int32.c b/modules/dsp/NE10_fft_int32.c index 81568998..7c57790c 100644 --- a/modules/dsp/NE10_fft_int32.c +++ b/modules/dsp/NE10_fft_int32.c @@ -1054,14 +1054,13 @@ static void ne10_fft_split_c2r_1d_int32 (ne10_fft_cpx_int32_t *dst, */ ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c (ne10_int32_t nfft) { - ne10_fft_cfg_int32_t st = NULL; ne10_uint32_t memneeded = sizeof (ne10_fft_state_int32_t) + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */ + sizeof (ne10_fft_cpx_int32_t) * nfft /* twiddles */ + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded); + ne10_fft_cfg_int32_t st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded); if (st) { uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int32_t); @@ -1157,7 +1156,6 @@ void ne10_fft_c2c_1d_int32_c (ne10_fft_cpx_int32_t *fout, */ ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32 (ne10_int32_t nfft) { - ne10_fft_r2c_cfg_int32_t st = NULL; ne10_int32_t ncfft = nfft >> 1; ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int32_t) @@ -1167,7 +1165,7 @@ ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32 (ne10_int32_t nfft) + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */ + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment */ - st = (ne10_fft_r2c_cfg_int32_t) NE10_MALLOC (memneeded); + ne10_fft_r2c_cfg_int32_t st = (ne10_fft_r2c_cfg_int32_t) NE10_MALLOC (memneeded); if (st) { diff --git a/modules/dsp/NE10_fft_int32.neon.c b/modules/dsp/NE10_fft_int32.neon.c index 2440ec01..023c5f34 100644 --- a/modules/dsp/NE10_fft_int32.neon.c +++ b/modules/dsp/NE10_fft_int32.neon.c @@ -904,7 +904,6 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, ne10_int32_t ncfft, ne10_int32_t scaled_flag) { - ne10_int32_t k; ne10_int32_t count = ncfft / 2; ne10_fft_cpx_int32_t fpnk, fpk, f1k, f2k, tw, tdc; int32x4x2_t q2_fpk, q2_fpnk, q2_tw, q2_dst, q2_dst2; @@ -913,7 +912,6 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, int32x4_t q_tw_r, q_tw_i; int32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int32x4_t q_dst2_r, q_dst2_i; - int32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; tdc.r = src[0].r; tdc.i = src[0].i; @@ -929,13 +927,13 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, if (scaled_flag) { - for (k = 1; k <= count ; k += 4) + for (ne10_int32_t k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fpk = vld2q_s32 (p_src); q2_fpnk = vld2q_s32 (p_src2); @@ -975,13 +973,13 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, } else { - for (k = 1; k <= count ; k += 4) + for (ne10_int32_t k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fpk = vld2q_s32 (p_src); q2_fpnk = vld2q_s32 (p_src2); @@ -1023,7 +1021,7 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, else { - for (k = 1; k <= ncfft / 2 ; ++k) + for (ne10_int32_t k = 1; k <= ncfft / 2 ; ++k) { fpk = src[k]; fpnk.r = src[ncfft - k].r; @@ -1058,7 +1056,6 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, ne10_int32_t scaled_flag) { - ne10_int32_t k; ne10_int32_t count = ncfft / 2; ne10_fft_cpx_int32_t fk, fnkc, fek, fok, tmp; int32x4x2_t q2_fk, q2_fnkc, q2_tw, q2_dst, q2_dst2; @@ -1066,7 +1063,6 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, int32x4_t q_fek_r, q_fek_i, q_fok_r, q_fok_i; int32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int32x4_t q_dst2_r, q_dst2_i; - int32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; dst[0].r = src[0].r + src[ncfft].r; @@ -1077,13 +1073,13 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, { if (scaled_flag) { - for (k = 1; k <= count ; k += 4) + for (ne10_int32_t k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fk = vld2q_s32 (p_src); q2_fnkc = vld2q_s32 (p_src2); @@ -1122,13 +1118,13 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, } else { - for (k = 1; k <= count ; k += 4) + for (ne10_int32_t k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fk = vld2q_s32 (p_src); q2_fnkc = vld2q_s32 (p_src2); @@ -1167,8 +1163,7 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, } else { - - for (k = 1; k <= ncfft / 2; k++) + for (ne10_int32_t k = 1; k <= ncfft / 2; k++) { fk = src[k]; fnkc.r = src[ncfft - k].r; diff --git a/modules/dsp/NE10_fft_int32.neonintrinsic.c b/modules/dsp/NE10_fft_int32.neonintrinsic.c index f37c750a..df801711 100644 --- a/modules/dsp/NE10_fft_int32.neonintrinsic.c +++ b/modules/dsp/NE10_fft_int32.neonintrinsic.c @@ -1245,7 +1245,6 @@ void ne10_mixed_radix_fft_forward_int32_##scaled##_neon (ne10_fft_cpx_int32_t * ne10_fft_cpx_int32_t * buffer) \ { \ ne10_int32_t fstride, mstride, N; \ - ne10_int32_t fstride1; \ ne10_int32_t f_count; \ ne10_int32_t stage_count; \ \ @@ -1329,7 +1328,6 @@ void ne10_mixed_radix_fft_backward_int32_##scaled##_neon (ne10_fft_cpx_int32_t * ne10_fft_cpx_int32_t * buffer) \ { \ ne10_int32_t fstride, mstride, N; \ - ne10_int32_t fstride1; \ ne10_int32_t f_count; \ ne10_int32_t stage_count; \ \ @@ -1426,7 +1424,6 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, int32x4_t q_tw_r, q_tw_i; int32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int32x4_t q_dst2_r, q_dst2_i; - int32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; tdc.r = src[0].r; tdc.i = src[0].i; @@ -1444,11 +1441,11 @@ static void ne10_fft_split_r2c_1d_int32_neon (ne10_fft_cpx_int32_t *dst, { for (k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fpk = vld2q_s32 (p_src); q2_fpnk = vld2q_s32 (p_src2); @@ -1579,7 +1576,6 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, int32x4_t q_fek_r, q_fek_i, q_fok_r, q_fok_i; int32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3; int32x4_t q_dst2_r, q_dst2_i; - int32_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles; dst[0].r = src[0].r + src[ncfft].r; @@ -1592,11 +1588,11 @@ static void ne10_fft_split_c2r_1d_int32_neon (ne10_fft_cpx_int32_t *dst, { for (k = 1; k <= count ; k += 4) { - p_src = (int32_t*) (& (src[k])); - p_src2 = (int32_t*) (& (src[ncfft - k - 3])); - p_twiddles = (int32_t*) (& (twiddles[k - 1])); - p_dst = (int32_t*) (& (dst[k])); - p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); + int32_t* p_src = (int32_t*) (& (src[k])); + int32_t* p_src2 = (int32_t*) (& (src[ncfft - k - 3])); + int32_t* p_twiddles = (int32_t*) (& (twiddles[k - 1])); + int32_t* p_dst = (int32_t*) (& (dst[k])); + int32_t* p_dst2 = (int32_t*) (& (dst[ncfft - k - 3])); q2_fk = vld2q_s32 (p_src); q2_fnkc = vld2q_s32 (p_src2); diff --git a/modules/dsp/NE10_fir.c b/modules/dsp/NE10_fir.c index ac32f467..a2560b2d 100644 --- a/modules/dsp/NE10_fir.c +++ b/modules/dsp/NE10_fir.c @@ -434,7 +434,6 @@ void ne10_fir_decimate_float_c (const ne10_fir_decimate_instance_f32_t * S, ne10_float32_t *pState = S->pState; /* State pointer */ ne10_float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ ne10_float32_t *pStateCurnt; /* Points to the current sample of the state */ - ne10_float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ ne10_float32_t sum0; /* Accumulator */ ne10_float32_t x0, c0; /* Temporary variables to hold state and coefficient values */ ne10_uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ @@ -465,11 +464,11 @@ void ne10_fir_decimate_float_c (const ne10_fir_decimate_instance_f32_t * S, /* Set accumulator to zero */ sum0 = 0.0f; - /* Initialize state pointer */ - px = pState; + /* Initialize temporary state pointer */ + ne10_float32_t *px = pState; - /* Initialize coeff pointer */ - pb = pCoeffs; + /* Initialize temporary coeff pointer */ + ne10_float32_t *pb = pCoeffs; /* Loop unrolling. Process 4 taps at a time. */ tapCnt = numTaps >> 2; @@ -1235,7 +1234,6 @@ static void ne10_circular_write_float (ne10_int32_t * circBuffer, ne10_int32_t srcInc, ne10_uint32_t blockSize) { - ne10_uint32_t i = 0u; ne10_int32_t wOffset; /* Copy the value of Index pointer that points @@ -1243,7 +1241,7 @@ static void ne10_circular_write_float (ne10_int32_t * circBuffer, wOffset = *writeOffset; /* Loop over the blockSize */ - i = blockSize; + ne10_uint32_t i = blockSize; while (i > 0u) { @@ -1281,7 +1279,6 @@ static void ne10_circular_read_float (ne10_int32_t * circBuffer, ne10_int32_t dstInc, ne10_uint32_t blockSize) { - ne10_uint32_t i = 0u; ne10_int32_t rOffset, *dst_end; /* Copy the value of Index pointer that points @@ -1290,7 +1287,7 @@ static void ne10_circular_read_float (ne10_int32_t * circBuffer, dst_end = dst_base + dst_length; /* Loop over the blockSize */ - i = blockSize; + ne10_uint32_t i = blockSize; while (i > 0u) { diff --git a/modules/dsp/NE10_iir.c b/modules/dsp/NE10_iir.c index fb56cc94..51808c9b 100644 --- a/modules/dsp/NE10_iir.c +++ b/modules/dsp/NE10_iir.c @@ -117,7 +117,6 @@ void ne10_iir_lattice_float_c (const ne10_iir_lattice_instance_f32_t * S, ne10_float32_t fcurr, fnext = 0, gcurr, gnext; /* Temporary variables for lattice stages */ ne10_float32_t acc; /* Accumlator */ ne10_uint32_t blkCnt, tapCnt; /* temporary variables for counts */ - ne10_float32_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ ne10_uint32_t numStages = S->numStages; /* number of stages */ ne10_float32_t *pState; /* State pointer */ ne10_float32_t *pStateCurnt; /* State current pointer */ @@ -138,15 +137,15 @@ void ne10_iir_lattice_float_c (const ne10_iir_lattice_instance_f32_t * S, fcurr = *pSrc++; /* Initialize state read pointer */ - px1 = pState; + ne10_float32_t* px1 = pState; /* Initialize state write pointer */ - px2 = pState; + ne10_float32_t* px2 = pState; /* Set accumulator to zero */ acc = 0.0f; /* Initialize Ladder coeff pointer */ - pv = &S->pvCoeffs[S->numStages]; + ne10_float32_t* pv = &S->pvCoeffs[S->numStages]; /* Initialize Reflection coeff pointer */ - pk = &S->pkCoeffs[0]; + ne10_float32_t* pk = &S->pkCoeffs[0]; /* Process sample for first tap */ diff --git a/modules/dsp/NE10_rfft_float32.neonintrinsic.c b/modules/dsp/NE10_rfft_float32.neonintrinsic.c index 53aa787c..09dc6e29 100644 --- a/modules/dsp/NE10_rfft_float32.neonintrinsic.c +++ b/modules/dsp/NE10_rfft_float32.neonintrinsic.c @@ -487,11 +487,10 @@ NE10_INLINE void ne10_radix4x4_r2c_with_twiddles_neon (ne10_fft_cpx_float32_t *F const float32x4_t *Fin_neon = (float32x4_t*) Fin; float32x4_t *Fout_neon = (float32x4_t*) Fout; - const ne10_fft_cpx_float32_t *tw; for (f_count = fstride; f_count; f_count --) { - tw = twiddles + 3; + const ne10_fft_cpx_float32_t* tw = twiddles + 3; // first butterfly ne10_radix4x4_r2c_with_twiddles_first_butterfly_neon ( Fout_neon, Fin_neon, out_step, in_step, NULL); @@ -529,11 +528,10 @@ NE10_INLINE void ne10_radix4x4_c2r_with_twiddles_neon (ne10_fft_cpx_float32_t *F const float32x4_t *Fin_neon = (float32x4_t*) Fin; float32x4_t *Fout_neon = (float32x4_t*) Fout; - const ne10_fft_cpx_float32_t *tw; for (f_count = fstride; f_count; f_count --) { - tw = twiddles + 3; + const ne10_fft_cpx_float32_t* tw = twiddles + 3; // first butterfly ne10_radix4x4_c2r_with_twiddles_first_butterfly_neon ( Fout_neon, Fin_neon, out_step, in_step, NULL); diff --git a/modules/dsp/test/test_suite_fft_int16.c b/modules/dsp/test/test_suite_fft_int16.c index 1abcccbe..b799adc2 100644 --- a/modules/dsp/test/test_suite_fft_int16.c +++ b/modules/dsp/test/test_suite_fft_int16.c @@ -82,8 +82,6 @@ void test_fft_c2c_1d_int16_conformance() ne10_int32_t i = 0; ne10_int32_t fftSize = 0; ne10_fft_cfg_int16_t cfg; - ne10_float32_t * out_c_tmp = NULL; - ne10_float32_t * out_neon_tmp = NULL; fprintf (stdout, "----------%30s start\n", __FUNCTION__); @@ -99,8 +97,8 @@ void test_fft_c2c_1d_int16_conformance() out_c = guarded_out_c + ARRAY_GUARD_LEN; out_neon = guarded_out_neon + ARRAY_GUARD_LEN; - out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); - out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { @@ -369,8 +367,6 @@ void test_fft_r2c_1d_int16_conformance() ne10_int32_t i = 0; ne10_int32_t fftSize = 0; ne10_fft_r2c_cfg_int16_t cfg; - ne10_float32_t * out_c_tmp = NULL; - ne10_float32_t * out_neon_tmp = NULL; fprintf (stdout, "----------%30s start\n", __FUNCTION__); @@ -386,8 +382,8 @@ void test_fft_r2c_1d_int16_conformance() out_c = guarded_out_c + ARRAY_GUARD_LEN; out_neon = guarded_out_neon + ARRAY_GUARD_LEN; - out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); - out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { diff --git a/modules/dsp/test/test_suite_fft_int32.c b/modules/dsp/test/test_suite_fft_int32.c index 73a694ae..38b1a910 100644 --- a/modules/dsp/test/test_suite_fft_int32.c +++ b/modules/dsp/test/test_suite_fft_int32.c @@ -86,8 +86,6 @@ void test_fft_c2c_1d_int32_conformance() ne10_int32_t factor; ne10_fft_cfg_int32_t cfg_c; ne10_fft_cfg_int32_t cfg_neon; - ne10_float32_t * out_c_tmp = NULL; - ne10_float32_t * out_neon_tmp = NULL; fprintf (stdout, "----------%30s start\n", __FUNCTION__); @@ -103,8 +101,8 @@ void test_fft_c2c_1d_int32_conformance() out_c = guarded_out_c + ARRAY_GUARD_LEN; out_neon = guarded_out_neon + ARRAY_GUARD_LEN; - out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); - out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { @@ -413,8 +411,6 @@ void test_fft_r2c_1d_int32_conformance() ne10_int32_t i = 0; ne10_int32_t fftSize = 0; ne10_fft_r2c_cfg_int32_t cfg; - ne10_float32_t * out_c_tmp = NULL; - ne10_float32_t * out_neon_tmp = NULL; fprintf (stdout, "----------%30s start\n", __FUNCTION__); @@ -430,8 +426,8 @@ void test_fft_r2c_1d_int32_conformance() out_c = guarded_out_c + ARRAY_GUARD_LEN; out_neon = guarded_out_neon + ARRAY_GUARD_LEN; - out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); - out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); + ne10_float32_t* out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t)); for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { diff --git a/modules/dsp/test/test_suite_fir.c b/modules/dsp/test/test_suite_fir.c index 2df3f8e9..70471f42 100644 --- a/modules/dsp/test/test_suite_fir.c +++ b/modules/dsp/test/test_suite_fir.c @@ -209,12 +209,6 @@ void test_fir_case0() { ne10_fir_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - - test_config *config; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -238,16 +232,16 @@ void test_fir_case0() #ifdef ENABLE_NE10_FIR_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; + test_config* config = &CONFIG[loop]; /* Initialize the CFFT/CIFFT module */ ne10_fir_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); ne10_fir_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -256,12 +250,12 @@ void test_fir_case0() GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } @@ -293,16 +287,16 @@ void test_fir_case0() #ifdef PERFORMANCE_TEST ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; + test_config* config = &CONFIG_PERF[loop]; /* Initialize the CFFT/CIFFT module */ ne10_fir_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); ne10_fir_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -314,7 +308,7 @@ void test_fir_case0() { for (k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } @@ -329,7 +323,7 @@ void test_fir_case0() { for (k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } diff --git a/modules/dsp/test/test_suite_fir_decimate.c b/modules/dsp/test/test_suite_fir_decimate.c index 0a274299..9bdee754 100644 --- a/modules/dsp/test/test_suite_fir_decimate.c +++ b/modules/dsp/test/test_suite_fir_decimate.c @@ -199,15 +199,6 @@ void test_fir_decimate_case0() { ne10_fir_decimate_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - ne10_uint16_t length = 0; - - test_config *config; - ne10_result_t status_c = NE10_OK; - ne10_result_t status_neon = NE10_OK; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -224,15 +215,14 @@ void test_fir_decimate_case0() #ifdef ENABLE_NE10_FIR_DECIMATE_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) - ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; - length = config->numFrames * config->blockSize / config->D; + test_config *config = &CONFIG[loop]; + ne10_uint16_t length = config->numFrames * config->blockSize / config->D; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); - status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); + ne10_result_t status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); + ne10_result_t status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -247,7 +237,7 @@ void test_fir_decimate_case0() } } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -256,12 +246,12 @@ void test_fir_decimate_case0() GUARD_ARRAY (out_c, length); GUARD_ARRAY (out_neon, length); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_decimate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize / config->D), config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_decimate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize / config->D), config->blockSize); } @@ -278,7 +268,7 @@ void test_fir_decimate_case0() printf ("--------------------config %d\n", loop); printf ("snr %f\n", snr); #endif - for (pos = 0; pos < length; pos++) + for (ne10_uint16_t pos = 0; pos < length; pos++) { #if defined (DEBUG_TRACE) printf ("pos %d \n", pos); @@ -291,16 +281,15 @@ void test_fir_decimate_case0() #endif // ENABLE_NE10_FIR_DECIMATE_FLOAT_NEON #ifdef PERFORMANCE_TEST - ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; - length = config->numFrames * config->blockSize / config->D; + test_config *config = &CONFIG_PERF[loop]; + ne10_uint16_t length = config->numFrames * config->blockSize / config->D; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); - status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); + ne10_result_t status_c = ne10_fir_decimate_init_float (&SC, config->numTaps, config->D, config->coeffsF32, fir_state_c, config->blockSize); + ne10_result_t status_neon = ne10_fir_decimate_init_float (&SN, config->numTaps, config->D, config->coeffsF32, fir_state_neon, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -316,7 +305,7 @@ void test_fir_decimate_case0() } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -326,9 +315,9 @@ void test_fir_decimate_case0() ( time_c, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_decimate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize / config->D), config->blockSize); } @@ -340,9 +329,9 @@ void test_fir_decimate_case0() ( time_neon, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_decimate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize / config->D), config->blockSize); } diff --git a/modules/dsp/test/test_suite_fir_interpolate.c b/modules/dsp/test/test_suite_fir_interpolate.c index 86bda77d..704a217e 100644 --- a/modules/dsp/test/test_suite_fir_interpolate.c +++ b/modules/dsp/test/test_suite_fir_interpolate.c @@ -179,15 +179,6 @@ void test_fir_interpolate_case0() { ne10_fir_interpolate_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - ne10_uint16_t length = 0; - - test_config *config; - ne10_result_t status_c = NE10_OK; - ne10_result_t status_neon = NE10_OK; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -204,15 +195,14 @@ void test_fir_interpolate_case0() #ifdef ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) - ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; - length = config->numFrames * config->blockSize * config->D; + test_config * config = &CONFIG[loop]; + ne10_uint16_t length = config->numFrames * config->blockSize * config->D; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); - status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + ne10_result_t status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + ne10_result_t status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -227,7 +217,7 @@ void test_fir_interpolate_case0() } } /* copy input to input buffer */ - for (i = 0; i < 80; i++) + for (ne10_uint16_t i = 0; i < 80; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -236,11 +226,11 @@ void test_fir_interpolate_case0() GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize); } @@ -257,7 +247,7 @@ void test_fir_interpolate_case0() printf ("--------------------config %d\n", loop); printf ("snr %f\n", snr); #endif - for (pos = 0; pos < length; pos++) + for (ne10_uint16_t pos = 0; pos < length; pos++) { #if defined (DEBUG_TRACE) printf ("pos %d \n", pos); @@ -271,16 +261,14 @@ void test_fir_interpolate_case0() #endif // ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON #ifdef PERFORMANCE_TEST - ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; - length = config->numFrames * config->blockSize * config->D; + test_config *config = &CONFIG_PERF[loop]; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); - status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); + ne10_result_t status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize); + ne10_result_t status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -296,7 +284,7 @@ void test_fir_interpolate_case0() } /* copy input to input buffer */ - for (i = 0; i < 80; i++) + for (ne10_uint16_t i = 0; i < 80; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -306,9 +294,9 @@ void test_fir_interpolate_case0() ( time_c, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize); } @@ -320,9 +308,9 @@ void test_fir_interpolate_case0() ( time_neon, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize); } diff --git a/modules/dsp/test/test_suite_fir_lattice.c b/modules/dsp/test/test_suite_fir_lattice.c index 1fbec0d3..bb93b149 100644 --- a/modules/dsp/test/test_suite_fir_lattice.c +++ b/modules/dsp/test/test_suite_fir_lattice.c @@ -210,14 +210,6 @@ void test_fir_lattice_case0() { ne10_fir_lattice_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - - test_config *config; - ne10_result_t status_c = NE10_OK; - ne10_result_t status_neon = NE10_OK; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -234,21 +226,20 @@ void test_fir_lattice_case0() #ifdef ENABLE_NE10_FIR_LATTICE_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) - ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; + test_config * config = &CONFIG[loop]; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_lattice_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c); - status_neon = ne10_fir_lattice_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon); + ne10_result_t status_c = ne10_fir_lattice_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c); + ne10_result_t status_neon = ne10_fir_lattice_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { fprintf (stdout, "initialization error\n"); } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -257,11 +248,11 @@ void test_fir_lattice_case0() GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } @@ -278,7 +269,7 @@ void test_fir_lattice_case0() printf ("--------------------config %d\n", loop); printf ("snr %f\n", snr); #endif - for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + for (ne10_uint16_t pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) { #if defined (DEBUG_TRACE) printf ("pos %d \n", pos); @@ -292,11 +283,10 @@ void test_fir_lattice_case0() #endif // ENABLE_NE10_FIR_LATTICE_FLOAT_NEON #ifdef PERFORMANCE_TEST - ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; + test_config *config = &CONFIG_PERF[loop]; /* Initialize the CFFT/CIFFT module */ status_c = ne10_fir_lattice_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c); @@ -308,7 +298,7 @@ void test_fir_lattice_case0() } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -318,9 +308,9 @@ void test_fir_lattice_case0() ( time_c, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } @@ -333,9 +323,9 @@ void test_fir_lattice_case0() ( time_neon, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } diff --git a/modules/dsp/test/test_suite_fir_sparse.c b/modules/dsp/test/test_suite_fir_sparse.c index 583b02d3..f62ff52c 100644 --- a/modules/dsp/test/test_suite_fir_sparse.c +++ b/modules/dsp/test/test_suite_fir_sparse.c @@ -207,14 +207,6 @@ void test_fir_sparse_case0() { ne10_fir_sparse_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - - test_config *config; - ne10_result_t status_c = NE10_OK; - ne10_result_t status_neon = NE10_OK; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -231,14 +223,13 @@ void test_fir_sparse_case0() #ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) - ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; + test_config *config = &CONFIG[loop]; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); - status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); + ne10_result_t status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); + ne10_result_t status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -246,7 +237,7 @@ void test_fir_sparse_case0() } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -257,11 +248,11 @@ void test_fir_sparse_case0() GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize); } @@ -278,7 +269,7 @@ void test_fir_sparse_case0() printf ("--------------------config %d\n", loop); printf ("snr %f\n", snr); #endif - for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + for (ne10_uint16_t pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) { #if defined (DEBUG_TRACE) printf ("pos %d \n", pos); @@ -292,15 +283,14 @@ void test_fir_sparse_case0() #endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON #ifdef PERFORMANCE_TEST - ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; + test_config *config = &CONFIG_PERF[loop]; /* Initialize the CFFT/CIFFT module */ - status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); - status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); + ne10_result_t status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize); + ne10_result_t status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize); if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR))) { @@ -308,7 +298,7 @@ void test_fir_sparse_case0() } /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -318,9 +308,9 @@ void test_fir_sparse_case0() ( time_c, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize); } @@ -333,9 +323,9 @@ void test_fir_sparse_case0() ( time_neon, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize); } diff --git a/modules/dsp/test/test_suite_iir.c b/modules/dsp/test/test_suite_iir.c index 87092972..ca3dfc15 100644 --- a/modules/dsp/test/test_suite_iir.c +++ b/modules/dsp/test/test_suite_iir.c @@ -254,12 +254,6 @@ void test_iir_lattice_case0() { ne10_iir_lattice_instance_f32_t SC, SN; - ne10_uint16_t loop = 0; - ne10_uint16_t block = 0; - ne10_uint16_t i = 0; - - test_config *config; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init input memory */ @@ -276,17 +270,16 @@ void test_iir_lattice_case0() #ifdef ENABLE_NE10_IIR_LATTICE_FLOAT_NEON #if defined (SMOKE_TEST)||(REGRESSION_TEST) - ne10_uint16_t pos = 0; - for (loop = 0; loop < NUM_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_TESTS; loop++) { - config = &CONFIG[loop]; + test_config *config = &CONFIG[loop]; /* Initialize the CFFT/CIFFT module */ ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize); ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize); /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -297,11 +290,11 @@ void test_iir_lattice_case0() GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES); GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES); - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } @@ -318,7 +311,7 @@ void test_iir_lattice_case0() assert_false ( (snr < SNR_THRESHOLD)); //conformance test 2: compare output of C and neon - for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) + for (ne10_uint16_t pos = 0; pos < TEST_LENGTH_SAMPLES; pos++) { #if defined (DEBUG_TRACE) printf ("pos %d \n", pos); @@ -332,18 +325,17 @@ void test_iir_lattice_case0() #endif // ENABLE_NE10_IIR_LATTICE_FLOAT_NEON #ifdef PERFORMANCE_TEST - ne10_uint16_t k; fprintf (stdout, "%25s%20s%20s%20s%20s\n", "IIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < NUM_PERF_TESTS; loop++) + for (ne10_uint16_t loop = 0; loop < NUM_PERF_TESTS; loop++) { - config = &CONFIG_PERF[loop]; + test_config *config = &CONFIG_PERF[loop]; /* Initialize the CFFT/CIFFT module */ ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize); ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize); /* copy input to input buffer */ - for (i = 0; i < TEST_LENGTH_SAMPLES; i++) + for (ne10_uint16_t i = 0; i < TEST_LENGTH_SAMPLES; i++) { in_c[i] = testInput_f32[i]; in_neon[i] = testInput_f32[i]; @@ -353,9 +345,9 @@ void test_iir_lattice_case0() ( time_c, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize); } @@ -368,9 +360,9 @@ void test_iir_lattice_case0() ( time_neon, { - for (k = 0; k < TEST_COUNT; k++) + for (ne10_uint16_t k = 0; k < TEST_COUNT; k++) { - for (block = 0; block < config->numFrames; block++) + for (ne10_uint16_t block = 0; block < config->numFrames; block++) { ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize); } diff --git a/modules/imgproc/NE10_boxfilter.c b/modules/imgproc/NE10_boxfilter.c index b78ffa5a..736ce978 100644 --- a/modules/imgproc/NE10_boxfilter.c +++ b/modules/imgproc/NE10_boxfilter.c @@ -98,31 +98,26 @@ void ne10_img_boxfilter_row_border (const ne10_uint8_t* src, *border_l_p = anchor.x; *border_r_p = kernel.x - (anchor.x + 1); - ne10_int32_t x, y, k; - - const ne10_uint8_t *src_row; - ne10_uint8_t *dst_row; - - for (y = 0; y < src_sz.y; y++) + for (ne10_int32_t y = 0; y < src_sz.y; y++) { - src_row = src + y * src_stride; - dst_row = dst + y * dst_stride; + const ne10_uint8_t *src_row = src + y * src_stride; + ne10_uint8_t *dst_row = dst + y * dst_stride; ne10_float32_t sum[RGBA_CH]; /* compute left border */ ne10_int32_t offset = kernel.x - *border_l_p - 1; - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { sum[k] = 0; - for (x = 0; x < offset; x++) + for (ne10_int32_t x = 0; x < offset; x++) { sum[k] += * (src_row + x * RGBA_CH + k); } } - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { - for (x = 0; x < *border_l_p; x++) + for (ne10_int32_t x = 0; x < *border_l_p; x++) { sum[k] += * (src_row + (offset + x) * RGBA_CH + k); @@ -132,19 +127,19 @@ void ne10_img_boxfilter_row_border (const ne10_uint8_t* src, } /* compute right border */ - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { sum[k] = 0; - for (x = 0; x < kernel.x; x++) + for (ne10_int32_t x = 0; x < kernel.x; x++) { sum[k] += * (src_row + (src_sz.x - kernel.x + x) * RGBA_CH + k); } } - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { - for (x = 0; x < *border_r_p; x++) + for (ne10_int32_t x = 0; x < *border_r_p; x++) { sum[k] -= * (src_row + (src_sz.x - kernel.x + x) * RGBA_CH + k); @@ -223,31 +218,27 @@ void ne10_img_boxfilter_col_border (const ne10_uint8_t *src, *border_t_p = anchor.y; *border_b_p = kernel.y - (anchor.y + 1); - ne10_int32_t x, y, k; - const ne10_uint8_t *src_col; - ne10_uint8_t *dst_col; - - for (x = 0; x < src_sz.x; x++) + for (ne10_int32_t x = 0; x < src_sz.x; x++) { - src_col = src + x * RGBA_CH; - dst_col = dst + x * RGBA_CH; + const ne10_uint8_t *src_col = src + x * RGBA_CH; + ne10_uint8_t *dst_col = dst + x * RGBA_CH; ne10_float32_t sum[RGBA_CH]; /* compute top border */ ne10_int32_t offset = kernel.y - *border_t_p - 1; - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { sum[k] = 0; - for (y = 0; y < offset; y++) + for (ne10_int32_t y = 0; y < offset; y++) { sum[k] += * (src_col + y * src_stride + k); } } - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { - for (y = 0; y < *border_t_p; y++) + for (ne10_int32_t y = 0; y < *border_t_p; y++) { sum[k] += * (src_col + (offset + y) * src_stride + k); @@ -257,19 +248,19 @@ void ne10_img_boxfilter_col_border (const ne10_uint8_t *src, } /* compute the bottom border */ - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { sum[k] = 0; - for (y = 0; y < kernel.y; y++) + for (ne10_int32_t y = 0; y < kernel.y; y++) { sum[k] += * (src_col + (src_sz.y - kernel.y + y) * src_stride + k); } } - for (k = 0; k < RGBA_CH; k++) + for (ne10_int32_t k = 0; k < RGBA_CH; k++) { - for (y = 0; y < *border_b_p; y++) + for (ne10_int32_t y = 0; y < *border_b_p; y++) { sum[k] -= * (src_col + (src_sz.y - kernel.y + y) * src_stride + k); diff --git a/modules/imgproc/NE10_boxfilter.neon.c b/modules/imgproc/NE10_boxfilter.neon.c index 8759c1d6..546d2d66 100644 --- a/modules/imgproc/NE10_boxfilter.neon.c +++ b/modules/imgproc/NE10_boxfilter.neon.c @@ -318,7 +318,7 @@ void ne10_img_boxfilter_col_neon (const ne10_uint8_t *src, ne10_uint32_t prev = (anchor.y + 1) * src_stride; ne10_uint32_t next = (kernel.y - anchor.y - 1) * src_stride; - uint16x8_t sum_vec, sum_vec_pre; + uint16x8_t sum_vec_pre; int16x8_t sum_vec_s; uint8x8_t src_pixel_prev_vec, src_pixel_next_vec; uint8x8_t src_pixel_prev_vec_pre, src_pixel_next_vec_pre; @@ -345,13 +345,12 @@ void ne10_img_boxfilter_col_neon (const ne10_uint8_t *src, ne10_uint8_t *dst_pixel = dst_row; src_pixel_prev_vec = vld1_u8 (src_pixel - prev); src_pixel_next_vec = vld1_u8 (src_pixel + next); - ne10_uint16_t *sum, *sum_pre; - sum_vec = vld1q_u16 (sum_row); + uint16x8_t sum_vec = vld1q_u16 (sum_row); for (x = 0; x < src_sz_x_adjust; x += 2) { - sum = sum_row + x * RGBA_CH; - sum_pre = sum + 2 * RGBA_CH; + ne10_uint16_t *sum = sum_row + x * RGBA_CH; + ne10_uint16_t *sum_pre = sum + 2 * RGBA_CH; sum_vec_pre = vld1q_u16 (sum_pre); /* preload */ src_pixel = src_row + (x + 2) * RGBA_CH; @@ -385,7 +384,7 @@ void ne10_img_boxfilter_col_neon (const ne10_uint8_t *src, dst_row = dst + (1 + border_t) * dst_stride; /* step back one column */ x = src_sz.x - 2; - sum_vec = vld1q_u16 (sum_row + x * RGBA_CH); + uint16x8_t sum_vec = vld1q_u16 (sum_row + x * RGBA_CH); while (src_row < src_row_end) { diff --git a/modules/imgproc/test/test_suite_rotate.c b/modules/imgproc/test/test_suite_rotate.c index 7b18fa36..3c69e8eb 100644 --- a/modules/imgproc/test/test_suite_rotate.c +++ b/modules/imgproc/test/test_suite_rotate.c @@ -118,7 +118,9 @@ void test_rotate_performance_case() ne10_int32_t srcw = SRC_WIDTH; ne10_int32_t srch = SRC_HEIGHT; ne10_uint32_t dstw_c, dsth_c; +#ifdef ENABLE_NE10_IMG_ROTATE_RGBA_NEON ne10_uint32_t dstw_neon, dsth_neon; +#endif // ENABLE_NE10_IMG_ROTATE_RGBA_NEON ne10_int32_t angle; ne10_int64_t time_c = 0; ne10_int64_t time_neon = 0; diff --git a/modules/math/test/test_suite_math.c b/modules/math/test/test_suite_math.c index c514e1ff..d32d1d3f 100644 --- a/modules/math/test/test_suite_math.c +++ b/modules/math/test/test_suite_math.c @@ -88,9 +88,6 @@ static ne10_float32_t time_savings = 0.0f; void test_abs_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - /* init function table */ memset (ftbl_3args, 0, sizeof (ftbl_3args)); ftbl_3args[ 0] = (ne10_func_3args_t) ne10_abs_float_c; @@ -116,9 +113,9 @@ void test_abs_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -160,13 +157,13 @@ void test_abs_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -185,9 +182,6 @@ void test_abs_case0() void test_addc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -215,9 +209,9 @@ void test_addc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -271,13 +265,13 @@ void test_addc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -286,10 +280,10 @@ void test_addc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -309,9 +303,6 @@ void test_addc_case0() void test_add_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -338,9 +329,9 @@ void test_add_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -385,13 +376,13 @@ void test_add_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -411,9 +402,6 @@ void test_add_case0() void test_cross_case0() { #define MAX_VEC_COMPONENTS 3 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -434,9 +422,9 @@ void test_cross_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -481,13 +469,13 @@ void test_cross_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 2; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -507,9 +495,6 @@ void test_cross_case0() void test_divc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -537,9 +522,9 @@ void test_divc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -592,13 +577,13 @@ void test_divc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -607,10 +592,10 @@ void test_divc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -630,9 +615,6 @@ void test_divc_case0() void test_div_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -659,9 +641,9 @@ void test_div_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -706,13 +688,13 @@ void test_div_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -732,9 +714,6 @@ void test_div_case0() void test_dot_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -758,9 +737,9 @@ void test_dot_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { #ifdef DEBUG_TRACE ne10_int32_t vec_size = func_loop + 1; @@ -807,13 +786,13 @@ void test_dot_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -833,9 +812,6 @@ void test_dot_case0() void test_len_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - /* init function table */ memset (ftbl_3args, 0, sizeof (ftbl_3args)); ftbl_3args[ 2] = (ne10_func_3args_t) ne10_len_vec2f_c; @@ -859,9 +835,9 @@ void test_len_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -903,13 +879,13 @@ void test_len_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -928,9 +904,6 @@ void test_len_case0() void test_mlac_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -959,9 +932,9 @@ void test_mlac_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1017,13 +990,13 @@ void test_mlac_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args_cst[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args_cst[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1032,10 +1005,10 @@ void test_mlac_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1056,9 +1029,6 @@ void test_mlac_case0() void test_mla_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1086,9 +1056,9 @@ void test_mla_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1136,13 +1106,13 @@ void test_mla_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop] (perftest_thedst_c, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_5args[2 * func_loop + 1] (perftest_thedst_neon, perftest_theacc, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1163,9 +1133,6 @@ void test_mla_case0() void test_mulc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1193,9 +1160,9 @@ void test_mulc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1248,13 +1215,13 @@ void test_mulc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1263,10 +1230,10 @@ void test_mulc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1286,9 +1253,6 @@ void test_mulc_case0() void test_mul_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1315,9 +1279,9 @@ void test_mul_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1362,13 +1326,13 @@ void test_mul_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1388,9 +1352,6 @@ void test_mul_case0() void test_normalize_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - /* init function table */ memset (ftbl_3args, 0, sizeof (ftbl_3args)); ftbl_3args[ 2] = (ne10_func_3args_t) ne10_normalize_vec2f_c; @@ -1414,9 +1375,9 @@ void test_normalize_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1458,13 +1419,13 @@ void test_normalize_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1483,9 +1444,6 @@ void test_normalize_case0() void test_rsbc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1513,9 +1471,9 @@ void test_rsbc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1568,13 +1526,13 @@ void test_rsbc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1583,10 +1541,10 @@ void test_rsbc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1606,9 +1564,6 @@ void test_rsbc_case0() void test_setc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1635,9 +1590,9 @@ void test_setc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1688,13 +1643,13 @@ void test_setc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args_cst[2 * func_loop] (perftest_thedst_c, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args_cst[2 * func_loop] (perftest_thedst_c, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1703,10 +1658,10 @@ void test_setc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1725,9 +1680,6 @@ void test_setc_case0() void test_subc_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1755,9 +1707,9 @@ void test_subc_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1810,13 +1762,13 @@ void test_subc_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < 1; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < 1; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst[0], loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args_cst[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst[0], loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1825,10 +1777,10 @@ void test_subc_case0() for (; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thecst, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thecst, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1848,9 +1800,6 @@ void test_subc_case0() void test_sub_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1877,9 +1826,9 @@ void test_sub_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -1924,13 +1873,13 @@ void test_sub_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 0; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -1950,9 +1899,6 @@ void test_sub_case0() void test_addmat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -1977,9 +1923,9 @@ void test_addmat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2024,13 +1970,13 @@ void test_addmat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2050,9 +1996,6 @@ void test_addmat_case0() void test_detmat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2076,9 +2019,9 @@ void test_detmat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2120,13 +2063,13 @@ void test_detmat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2145,9 +2088,6 @@ void test_detmat_case0() void test_identitymat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2168,9 +2108,9 @@ void test_identitymat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2203,13 +2143,13 @@ void test_identitymat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop] (perftest_thedst_c, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop] (perftest_thedst_c, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop + 1] (perftest_thedst_neon, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_2args[2 * func_loop + 1] (perftest_thedst_neon, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2227,9 +2167,6 @@ void test_identitymat_case0() void test_invmat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2253,9 +2190,9 @@ void test_invmat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2297,13 +2234,13 @@ void test_invmat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2322,9 +2259,6 @@ void test_invmat_case0() void test_mulmat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2349,9 +2283,9 @@ void test_mulmat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2396,13 +2330,13 @@ void test_mulmat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2422,9 +2356,6 @@ void test_mulmat_case0() void test_submat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2449,9 +2380,9 @@ void test_submat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2496,13 +2427,13 @@ void test_submat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, perftest_thesrc2, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, perftest_thesrc2, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2522,9 +2453,6 @@ void test_submat_case0() void test_transmat_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2548,9 +2476,9 @@ void test_transmat_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = (func_loop + 1) * (func_loop + 1); @@ -2592,13 +2520,13 @@ void test_transmat_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop] (perftest_thedst_c, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_3args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; @@ -2617,9 +2545,6 @@ void test_transmat_case0() void test_mulcmatvec_case0() { #define MAX_VEC_COMPONENTS 4 - ne10_int32_t loop; - ne10_int32_t func_loop; - fprintf (stdout, "----------%30s start\n", __FUNCTION__); /* init function table */ @@ -2644,9 +2569,9 @@ void test_mulcmatvec_case0() NE10_DST_ALLOC (thedst_c, guarded_dst_c, fixed_length); NE10_DST_ALLOC (thedst_neon, guarded_dst_neon, fixed_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { - for (loop = 0; loop < TEST_ITERATION; loop++) + for (ne10_int32_t loop = 0; loop < TEST_ITERATION; loop++) { vec_size = func_loop + 1; @@ -2694,13 +2619,13 @@ void test_mulcmatvec_case0() NE10_DST_ALLOC (perftest_thedst_c, perftest_guarded_dst_c, perftest_length); NE10_DST_ALLOC (perftest_thedst_neon, perftest_guarded_dst_neon, perftest_length); - for (func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) + for (ne10_int32_t func_loop = 1; func_loop < MAX_VEC_COMPONENTS; func_loop++) { GET_TIME (time_c, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thecst, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop] (perftest_thedst_c, perftest_thecst, perftest_thesrc1, loop); ); GET_TIME (time_neon, - for (loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, perftest_thesrc1, loop); + for (ne10_int32_t loop = 0; loop < PERF_TEST_ITERATION; loop++) ftbl_4args[2 * func_loop + 1] (perftest_thedst_neon, perftest_thecst, perftest_thesrc1, loop); ); time_speedup = (ne10_float32_t) time_c / time_neon; time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; diff --git a/test/src/NE10_random.c b/test/src/NE10_random.c index cb179d46..7f7ad17a 100644 --- a/test/src/NE10_random.c +++ b/test/src/NE10_random.c @@ -95,14 +95,14 @@ void NE10_float_rng_init_g (NE10_float_rng_t* float_rng, uint32_t seed) float NE10_float_rng_next_g (NE10_float_rng_t* float_rng) { - uint32_t frc, exp, sgn, ret; + uint32_t ret; do { // generate three random numbers - frc = NE10_rng_next_g (&float_rng->_private_m_rngs[0]); - exp = NE10_rng_next_g (&float_rng->_private_m_rngs[1]); - sgn = NE10_rng_next_g (&float_rng->_private_m_rngs[2]); + uint32_t frc = NE10_rng_next_g (&float_rng->_private_m_rngs[0]); + uint32_t exp = NE10_rng_next_g (&float_rng->_private_m_rngs[1]); + uint32_t sgn = NE10_rng_next_g (&float_rng->_private_m_rngs[2]); // take the top bits ( the sign uses the 17th bit) frc = (frc >> 9) & 0x7FFFFF ; // (1)b^23 diff --git a/test/src/seatest.c b/test/src/seatest.c index 18e2c74b..242eb343 100644 --- a/test/src/seatest.c +++ b/test/src/seatest.c @@ -136,7 +136,7 @@ void seatest_simple_test_result_log(int passed, char* reason, const char* functi } else { - printf("%-30s Line %-5d %s\r\n", function, line, reason ); + printf("%-30s Line %-5u %s\r\n", function, line, reason ); } sea_tests_failed++; } @@ -150,7 +150,7 @@ void seatest_simple_test_result_log(int passed, char* reason, const char* functi } else { - printf("%-30s Line %-5d Passed\r\n", function, line); + printf("%-30s Line %-5u Passed\r\n", function, line); } } sea_tests_passed++; @@ -185,10 +185,10 @@ void seatest_assert_ulong_equal(unsigned long expected, unsigned long actual, co void seatest_assert_float_vec_equal( float expected, float actual, unsigned int delta, unsigned int seatest_vec, const char* function, unsigned int line ) { - char s[SEATEST_PRINT_BUFFER_SIZE]; if (!EQUALS_FLOAT(expected, actual, delta)) { - sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) at vector->%d ", + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) at vector->%u ", expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, seatest_vec); seatest_simple_test_result( 0, s, function, line); } @@ -196,10 +196,10 @@ void seatest_assert_float_vec_equal( float expected, float actual, unsigned int void seatest_assert_float_equal( float expected, float actual, unsigned int delta, unsigned int loop_round, const char* function, unsigned int line ) { - char s[SEATEST_PRINT_BUFFER_SIZE]; if (!EQUALS_FLOAT(expected, actual, delta)) { - sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) in loop round %d", + char s[SEATEST_PRINT_BUFFER_SIZE]; + sprintf(s, "Expected %e (0x%04X) but was %e (0x%04X) in loop round %u", expected, *(unsigned int*)&expected, actual, *(unsigned int*)&actual, loop_round); seatest_simple_test_result( 0, s, function, line); }