Skip to content

Commit bf5795f

Browse files
author
Christopher Taylor
committed
Add CMakeLists and fix OS X compat
1 parent dee7d41 commit bf5795f

File tree

5 files changed

+58
-11
lines changed

5 files changed

+58
-11
lines changed

CMakeLists.txt

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
cmake_minimum_required(VERSION 3.7)
2+
project(leopard)
3+
4+
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
5+
6+
set(CMAKE_CXX_STANDARD 11)
7+
8+
set(LIB_SOURCE_FILES
9+
leopard.cpp
10+
leopard.h
11+
LeopardCommon.cpp
12+
LeopardCommon.h
13+
LeopardFF16.cpp
14+
LeopardFF16.h
15+
LeopardFF8.cpp
16+
LeopardFF8.h)
17+
18+
set(BENCH_SOURCE_FILES
19+
tests/benchmark.cpp)
20+
21+
set(EXPERIMENT_SOURCE_FILES
22+
tests/experiments.cpp)
23+
24+
if(NOT CMAKE_BUILD_TYPE)
25+
set(CMAKE_BUILD_TYPE Release)
26+
endif()
27+
28+
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
29+
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
30+
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
31+
32+
add_library(libleopard STATIC ${LIB_SOURCE_FILES})
33+
34+
add_executable(bench_leopard ${BENCH_SOURCE_FILES})
35+
target_link_libraries(bench_leopard libleopard)
36+
37+
add_executable(experiment_leopard ${EXPERIMENT_SOURCE_FILES})

LeopardCommon.h

+2
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@
153153
#include "leopard.h"
154154

155155
#include <stdint.h>
156+
#ifdef _WIN32
156157
#include <malloc.h>
158+
#endif //_WIN32
157159
#include <vector>
158160
#include <atomic>
159161
#include <memory>

LeopardFF16.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ static void InitializeMultiplyTables()
381381

382382
// For each log_m multiplicand:
383383
#pragma omp parallel for
384-
for (int log_m = 0; log_m < kOrder; ++log_m)
384+
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
385385
{
386386
const Product16Table& lut = Multiply16LUT[log_m];
387387

@@ -400,14 +400,16 @@ static void InitializeMultiplyTables()
400400
return;
401401
}
402402

403+
#if defined(LEO_TRY_AVX2)
403404
if (CpuHasAVX2)
404405
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
405406
else
407+
#endif // LEO_TRY_AVX2
406408
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
407409

408410
// For each value we could multiply by:
409411
#pragma omp parallel for
410-
for (int log_m = 0; log_m < kOrder; ++log_m)
412+
for (int log_m = 0; log_m < (int)kOrder; ++log_m)
411413
{
412414
// For each 4 bits of the finite field width in bits:
413415
for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
@@ -425,7 +427,9 @@ static void InitializeMultiplyTables()
425427
const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
426428

427429
// Store in 128-bit wide table
430+
#if defined(LEO_TRY_AVX2)
428431
if (!CpuHasAVX2)
432+
#endif // LEO_TRY_AVX2
429433
{
430434
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
431435
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
@@ -1341,9 +1345,6 @@ static void FFT_DIT(
13411345
unsigned dist4 = m, dist = m >> 2;
13421346
for (; dist != 0; dist4 = dist, dist >>= 2)
13431347
{
1344-
const unsigned thread_u = m_truncated / dist4;
1345-
const unsigned thread_v = dist;
1346-
13471348
// For each set of dist*4 elements:
13481349
#pragma omp parallel for
13491350
for (int r = 0; r < (int)m_truncated; r += dist4)
@@ -1439,8 +1440,6 @@ void ReedSolomonEncode(
14391440
// Handle final partial set of m pieces:
14401441
if (last_count != 0)
14411442
{
1442-
const unsigned i = original_count - last_count;
1443-
14441443
data += m;
14451444
skewLUT += m;
14461445

@@ -1692,7 +1691,7 @@ void ReedSolomonDecode(
16921691
FWHT(error_locations, kOrder, m + original_count);
16931692

16941693
#pragma omp parallel for
1695-
for (int i = 0; i < kOrder; ++i)
1694+
for (int i = 0; i < (int)kOrder; ++i)
16961695
error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
16971696

16981697
FWHT(error_locations, kOrder, kOrder);

LeopardFF8.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,11 @@ static void InitializeMultiplyTables()
368368
return;
369369
}
370370

371+
#ifdef LEO_TRY_AVX2
371372
if (CpuHasAVX2)
372373
Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
373374
else
375+
#endif // LEO_TRY_AVX2
374376
Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
375377

376378
// For each value we could multiply by:
@@ -388,7 +390,9 @@ static void InitializeMultiplyTables()
388390
const LEO_M128 value = _mm_loadu_si128(v_ptr);
389391

390392
// Store in 128-bit wide table
393+
#if defined(LEO_TRY_AVX2)
391394
if (!CpuHasAVX2)
395+
#endif // LEO_TRY_AVX2
392396
_mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
393397

394398
// Store in 256-bit wide table
@@ -1397,6 +1401,7 @@ static void FFT_DIT4(
13971401
{
13981402
#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
13991403

1404+
#if defined(LEO_TRY_AVX2)
14001405
if (CpuHasAVX2)
14011406
{
14021407
const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
@@ -1451,6 +1456,7 @@ static void FFT_DIT4(
14511456

14521457
return;
14531458
}
1459+
#endif // LEO_TRY_AVX2
14541460

14551461
if (CpuHasSSSE3)
14561462
{
@@ -1639,8 +1645,6 @@ void ReedSolomonEncode(
16391645
// Handle final partial set of m pieces:
16401646
if (last_count != 0)
16411647
{
1642-
const unsigned i = original_count - last_count;
1643-
16441648
data += m;
16451649
skewLUT += m;
16461650

tests/benchmark.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,19 @@ static bool SetCurrentThreadPriority()
9191
#ifdef _WIN32
9292
return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
9393
#else
94-
return -1 != nice(2);
94+
// setpriority on mac os x
95+
return true;
9596
#endif
9697
}
9798

9899

99100
//------------------------------------------------------------------------------
100101
// Timing
101102

103+
#ifndef _WIN32
104+
#include <sys/time.h>
105+
#endif
106+
102107
static uint64_t GetTimeUsec()
103108
{
104109
#ifdef _WIN32

0 commit comments

Comments
 (0)