Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/ffts.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ extern "C" {
struct _ffts_plan_t;
typedef struct _ffts_plan_t ffts_plan_t;

FFTS_API void*
ffts_aligned_malloc(size_t size);

FFTS_API void
ffts_aligned_free(void *p);

/* Complex data is stored in the interleaved format
(i.e, the real and imaginary parts composing each
element of complex data are stored adjacently in memory)
Expand Down
44 changes: 44 additions & 0 deletions src/ffts.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,50 @@ ffts_flush_instruction_cache(void *start, size_t length)
#endif
}

FFTS_API void*
ffts_aligned_malloc(size_t size)
{
void *p = NULL;

/* various ways to allocate aligned memory in order of preferance */
#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
p = (void*) _mm_malloc(size, 32);
#elif defined(HAVE_POSIX_MEMALIGN)
if (posix_memalign(&p, 32, size))
p = NULL;
#elif defined(HAVE_MEMALIGN)
p = memalign(32, size);
#elif defined(__ALTIVEC__)
p = vec_malloc(size);
#elif defined(_MSC_VER) || defined(WIN32)
p = _aligned_malloc(size, 32);
#elif defined(HAVE_VALLOC)
p = valloc(size);
#else
p = malloc(size);
#endif

return p;
}

FFTS_API void
ffts_aligned_free(void *p)
{
/* order must match with ffts_aligned_malloc */
#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
_mm_free(p);
#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
free(p);
#elif defined(__ALTIVEC__)
vec_free(p);
#elif defined(_MSC_VER) || defined(WIN32)
_aligned_free(p);
#else
/* valloc or malloc */
free(p);
#endif
}

static FFTS_INLINE void*
ffts_vmem_alloc(size_t length)
{
Expand Down
44 changes: 0 additions & 44 deletions src/ffts_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,50 +214,6 @@ struct _ffts_plan_t {
size_t i2;
};

static FFTS_INLINE void*
ffts_aligned_malloc(size_t size)
{
void *p = NULL;

/* various ways to allocate aligned memory in order of preferance */
#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
p = (void*) _mm_malloc(size, 32);
#elif defined(HAVE_POSIX_MEMALIGN)
if (posix_memalign(&p, 32, size))
p = NULL;
#elif defined(HAVE_MEMALIGN)
p = memalign(32, size);
#elif defined(__ALTIVEC__)
p = vec_malloc(size);
#elif defined(_MSC_VER) || defined(WIN32)
p = _aligned_malloc(size, 32);
#elif defined(HAVE_VALLOC)
p = valloc(size);
#else
p = malloc(size);
#endif

return p;
}

static FFTS_INLINE
void ffts_aligned_free(void *p)
{
/* order must match with ffts_aligned_malloc */
#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
_mm_free(p);
#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
free(p);
#elif defined(__ALTIVEC__)
vec_free(p);
#elif defined(_MSC_VER) || defined(WIN32)
_aligned_free(p);
#else
/* valloc or malloc */
free(p);
#endif
}

#if GCC_VERSION_AT_LEAST(3,3)
#define ffts_ctzl __builtin_ctzl

Expand Down
9 changes: 5 additions & 4 deletions src/ffts_trig.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/

#include "ffts.h"
#include "ffts_trig.h"
#include "ffts_dd.h"

/*
* For more information on algorithms:
*
* D. Potts, G. Steidl, M. Tasche, Numerical stability of fast
* trigonometric transforms a worst case study,
* J. Concrete Appl. Math. 1 (2003) 136
* trigonometric transforms — a worst case study,
* J. Concrete Appl. Math. 1 (2003) 1–36
*
* O. Buneman, Stable online creation of sines and cosines of
* successive angles, Proc. IEEE 75, 1434 1435 (1987).
* O. Buneman, Stable on–line creation of sines and cosines of
* successive angles, Proc. IEEE 75, 1434 – 1435 (1987).
*/

/* An union to initialize doubles using byte presentation,
Expand Down
18 changes: 4 additions & 14 deletions tests/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,8 @@ int test_transform(int n, int sign)
{
ffts_plan_t *p;

#ifdef HAVE_SSE
float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32);
float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float));
#endif
float FFTS_ALIGN(32) *input = ffts_aligned_malloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = ffts_aligned_malloc(2 * n * sizeof(float));
int i;

for (i = 0; i < n; i++) {
Expand Down Expand Up @@ -138,13 +133,8 @@ int main(int argc, char *argv[])
int n = atoi(argv[1]);
int sign = atoi(argv[2]);

#ifdef HAVE_SSE
float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32);
float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float));
#endif
float FFTS_ALIGN(32) *input = ffts_aligned_malloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = ffts_aligned_malloc(2 * n * sizeof(float));

for (i = 0; i < n; i++) {
input[2*i + 0] = (float) i;
Expand Down
1 change: 1 addition & 0 deletions tests/trig_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/

#include "ffts.h"
#include "../src/ffts_internal.h"
#include "../src/ffts_trig.h"

Expand Down