diff --git a/include/ffts.h b/include/ffts.h index b13316f6a60..7561b53d89c 100644 --- a/include/ffts.h +++ b/include/ffts.h @@ -66,6 +66,12 @@ extern "C" { struct _ffts_plan_t; typedef struct _ffts_plan_t ffts_plan_t; +FFTS_API void* +ffts_aligned_malloc(size_t size); + +FFTS_API void +ffts_aligned_free(void *p); + /* Complex data is stored in the interleaved format (i.e, the real and imaginary parts composing each element of complex data are stored adjacently in memory) diff --git a/src/ffts.c b/src/ffts.c index 35c5cad00db..a4b1d420cb7 100644 --- a/src/ffts.c +++ b/src/ffts.c @@ -128,6 +128,50 @@ ffts_flush_instruction_cache(void *start, size_t length) #endif } +FFTS_API void* +ffts_aligned_malloc(size_t size) +{ + void *p = NULL; + + /* various ways to allocate aligned memory in order of preferance */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + p = (void*) _mm_malloc(size, 32); +#elif defined(HAVE_POSIX_MEMALIGN) + if (posix_memalign(&p, 32, size)) + p = NULL; +#elif defined(HAVE_MEMALIGN) + p = memalign(32, size); +#elif defined(__ALTIVEC__) + p = vec_malloc(size); +#elif defined(_MSC_VER) || defined(WIN32) + p = _aligned_malloc(size, 32); +#elif defined(HAVE_VALLOC) + p = valloc(size); +#else + p = malloc(size); +#endif + + return p; +} + +FFTS_API void +ffts_aligned_free(void *p) +{ + /* order must match with ffts_aligned_malloc */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + _mm_free(p); +#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) + free(p); +#elif defined(__ALTIVEC__) + vec_free(p); +#elif defined(_MSC_VER) || defined(WIN32) + _aligned_free(p); +#else + /* valloc or malloc */ + free(p); +#endif +} + static FFTS_INLINE void* ffts_vmem_alloc(size_t length) { diff --git a/src/ffts_internal.h b/src/ffts_internal.h index 04ebb9c80ea..f01add8a424 100644 --- a/src/ffts_internal.h +++ b/src/ffts_internal.h @@ -214,50 +214,6 @@ struct _ffts_plan_t { size_t i2; }; -static FFTS_INLINE void* -ffts_aligned_malloc(size_t size) -{ - void *p = NULL; - - /* various ways to allocate aligned memory in order of preferance */ -#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) - p = (void*) _mm_malloc(size, 32); -#elif defined(HAVE_POSIX_MEMALIGN) - if (posix_memalign(&p, 32, size)) - p = NULL; -#elif defined(HAVE_MEMALIGN) - p = memalign(32, size); -#elif defined(__ALTIVEC__) - p = vec_malloc(size); -#elif defined(_MSC_VER) || defined(WIN32) - p = _aligned_malloc(size, 32); -#elif defined(HAVE_VALLOC) - p = valloc(size); -#else - p = malloc(size); -#endif - - return p; -} - -static FFTS_INLINE -void ffts_aligned_free(void *p) -{ - /* order must match with ffts_aligned_malloc */ -#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) - _mm_free(p); -#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) - free(p); -#elif defined(__ALTIVEC__) - vec_free(p); -#elif defined(_MSC_VER) || defined(WIN32) - _aligned_free(p); -#else - /* valloc or malloc */ - free(p); -#endif -} - #if GCC_VERSION_AT_LEAST(3,3) #define ffts_ctzl __builtin_ctzl diff --git a/src/ffts_trig.c b/src/ffts_trig.c index 65efa866e1f..4a70f59acfb 100644 --- a/src/ffts_trig.c +++ b/src/ffts_trig.c @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "ffts.h" #include "ffts_trig.h" #include "ffts_dd.h" @@ -37,11 +38,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * For more information on algorithms: * * D. Potts, G. Steidl, M. Tasche, Numerical stability of fast -* trigonometric transforms — a worst case study, -* J. Concrete Appl. Math. 1 (2003) 1–36 +* trigonometric transforms — a worst case study, +* J. Concrete Appl. Math. 1 (2003) 1–36 * -* O. Buneman, Stable on–line creation of sines and cosines of -* successive angles, Proc. IEEE 75, 1434 – 1435 (1987). +* O. Buneman, Stable on–line creation of sines and cosines of +* successive angles, Proc. IEEE 75, 1434 – 1435 (1987). */ /* An union to initialize doubles using byte presentation, diff --git a/tests/test.c b/tests/test.c index d07f766680b..7ccc1d3374a 100644 --- a/tests/test.c +++ b/tests/test.c @@ -100,13 +100,8 @@ int test_transform(int n, int sign) { ffts_plan_t *p; -#ifdef HAVE_SSE - float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32); - float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32); -#else - float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float)); - float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float)); -#endif + float FFTS_ALIGN(32) *input = ffts_aligned_malloc(2 * n * sizeof(float)); + float FFTS_ALIGN(32) *output = ffts_aligned_malloc(2 * n * sizeof(float)); int i; for (i = 0; i < n; i++) { @@ -138,13 +133,8 @@ int main(int argc, char *argv[]) int n = atoi(argv[1]); int sign = atoi(argv[2]); -#ifdef HAVE_SSE - float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32); - float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32); -#else - float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float)); - float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float)); -#endif + float FFTS_ALIGN(32) *input = ffts_aligned_malloc(2 * n * sizeof(float)); + float FFTS_ALIGN(32) *output = ffts_aligned_malloc(2 * n * sizeof(float)); for (i = 0; i < n; i++) { input[2*i + 0] = (float) i; diff --git a/tests/trig_test.c b/tests/trig_test.c index 258e5a6d52e..04a53e672b3 100644 --- a/tests/trig_test.c +++ b/tests/trig_test.c @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "ffts.h" #include "../src/ffts_internal.h" #include "../src/ffts_trig.h"