Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more generic AVX feature detection as well as flags for if avx512… #1041

Merged
merged 10 commits into from
Jan 17, 2024
Merged
8 changes: 2 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ include(AwsFeatureTests)
include(AwsSanitizers)
include(AwsThreadAffinity)
include(AwsThreadName)
include(AwsSIMD)
include(CTest)

set(GENERATED_ROOT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
Expand Down Expand Up @@ -224,12 +223,9 @@ target_include_directories(${PROJECT_NAME} PUBLIC

target_compile_definitions(${PROJECT_NAME} PRIVATE -DCJSON_HIDE_SYMBOLS)

# Enable SIMD encoder if the compiler supports the right features
simd_add_definitions(${PROJECT_NAME})

if (HAVE_AVX2_INTRINSICS)
if (AWS_HAVE_AVX2_INTRINSICS)
target_compile_definitions(${PROJECT_NAME} PRIVATE -DUSE_SIMD_ENCODING)
simd_add_source_avx2(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c")
simd_add_source_avx(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c")
message(STATUS "Building SIMD base64 decoder")
endif()

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Example:
* Avoid C99 features in header files. For some types such as bool, uint32_t etc..., these are defined if not available for the language
standard being used in `aws/common/common.h`, so feel free to use them.
* For C++ compatibility, don't put const members in structs.
* Avoid C++ style comments e.g. `//`.
* Avoid C++ style comments e.g. `//` in header files and prefer block style (`/* */`) for long blocks of text. C++ style comments are fine in C files.
* All public API functions need C++ guards and Windows dll semantics.
* Use Unix line endings.
* Where implementation hiding is desired for either ABI or runtime polymorphism reasons, use the `void *impl` pattern. v-tables
Expand Down
4 changes: 4 additions & 0 deletions cmake/AwsFeatureTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,7 @@ if(MSVC)
return 0;
}" AWS_HAVE_MSVC_INTRINSICS_X64)
endif()

# This does a lot to detect when intrinsics are available and has to set cflags to do so.
# leave it in its own file for ease of managing it.
include(AwsSIMD)
57 changes: 34 additions & 23 deletions cmake/AwsSIMD.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,31 @@ if (USE_CPU_EXTENSIONS)
if (MSVC)
check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG)
if (HAVE_M_AVX2_FLAG)
set(AVX2_CFLAGS "/arch:AVX2")
set(AVX_CFLAGS "/arch:AVX2")
endif()
else()
check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG)
if (HAVE_M_AVX2_FLAG)
set(AVX2_CFLAGS "-mavx -mavx2")
set(AVX_CFLAGS "-mavx -mavx2")
endif()
endif()

if (MSVC)
check_c_compiler_flag("/arch:AVX512" HAVE_M_AVX512_FLAG)
if (HAVE_M_AVX512_FLAG)
# docs imply AVX512 brings in AVX2. And it will compile, but it will break at runtime on
# instructions such as _mm256_load_si256(). Leave it on.
set(AVX_CFLAGS "/arch:AVX512 /arch:AVX2")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is mystery

endif()
else()
check_c_compiler_flag("-mavx512f -mvpclmulqdq" HAVE_M_AVX512_FLAG)
if (HAVE_M_AVX512_FLAG)
set(AVX_CFLAGS "-mavx512f -mvpclmulqdq -mpclmul -mavx -mavx2 -msse4.2")
endif()
endif()

set(old_flags "${CMAKE_REQUIRED_FLAGS}")
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX2_CFLAGS}")
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX_CFLAGS}")

check_c_source_compiles("
#include <immintrin.h>
Expand All @@ -35,7 +48,15 @@ if (USE_CPU_EXTENSIONS)
_mm256_permutevar8x32_epi32(vec, vec);

return 0;
}" HAVE_AVX2_INTRINSICS)
}" AWS_HAVE_AVX2_INTRINSICS)

check_c_source_compiles("
#include <immintrin.h>

int main() {
__m512 a = _mm512_setzero_ps();
return 0;
}" AWS_HAVE_AVX512_INTRINSICS)
graebm marked this conversation as resolved.
Show resolved Hide resolved

check_c_source_compiles("
#include <immintrin.h>
Expand All @@ -45,30 +66,20 @@ if (USE_CPU_EXTENSIONS)
__m256i vec;
memset(&vec, 0, sizeof(vec));
return (int)_mm256_extract_epi64(vec, 2);
}" HAVE_MM256_EXTRACT_EPI64)
}" AWS_HAVE_MM256_EXTRACT_EPI64)

set(CMAKE_REQUIRED_FLAGS "${old_flags}")
endif() # USE_CPU_EXTENSIONS
graebm marked this conversation as resolved.
Show resolved Hide resolved

macro(simd_add_definition_if target definition)
if(${definition})
target_compile_definitions(${target} PRIVATE -D${definition})
endif(${definition})
endmacro(simd_add_definition_if)

# Configure private preprocessor definitions for SIMD-related features
# Does not set any processor feature codegen flags
function(simd_add_definitions target)
simd_add_definition_if(${target} HAVE_AVX2_INTRINSICS)
simd_add_definition_if(${target} HAVE_MM256_EXTRACT_EPI64)
endfunction(simd_add_definitions)
# The part where the definition is added to the compiler flags has been moved to config.h.in
# see git history for more details.

# Adds source files only if AVX2 is supported. These files will be built with
# avx2 intrinsics enabled.
# Usage: simd_add_source_avx2(target file1.c file2.c ...)
function(simd_add_source_avx2 target)
# Adds AVX flags, if any, that are supported. These files will be built with
# available avx intrinsics enabled.
# Usage: simd_add_source_avx(target file1.c file2.c ...)
function(simd_add_source_avx target)
foreach(file ${ARGN})
target_sources(${target} PRIVATE ${file})
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}")
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX_CFLAGS}")
endforeach()
endfunction(simd_add_source_avx2)
endfunction(simd_add_source_avx)
3 changes: 3 additions & 0 deletions include/aws/common/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,8 @@
#cmakedefine AWS_HAVE_EXECINFO
#cmakedefine AWS_HAVE_WINAPI_DESKTOP
#cmakedefine AWS_HAVE_LINUX_IF_LINK_H
#cmakedefine AWS_HAVE_AVX2_INTRINSICS
#cmakedefine AWS_HAVE_AVX512_INTRINSICS
#cmakedefine AWS_HAVE_MM256_EXTRACT_EPI64

#endif
1 change: 1 addition & 0 deletions include/aws/common/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum aws_cpu_feature_name {
AWS_CPU_FEATURE_AVX512,
AWS_CPU_FEATURE_ARM_CRC,
AWS_CPU_FEATURE_BMI2,
AWS_CPU_FEATURE_VPCLMULQDQ,
AWS_CPU_FEATURE_COUNT,
};

Expand Down
13 changes: 13 additions & 0 deletions source/arch/intel/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,26 @@ static bool s_has_bmi2(void) {
return true;
}

static bool s_has_vpclmulqdq(void) {
uint32_t abcd[4];
/* Check VPCLMULQDQ:
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 20]==1 */
uint32_t vpclmulqdq_mask = (1 << 20);
aws_run_cpuid(7, 0, abcd);
if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
return false;
}
return true;
}

has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = {
[AWS_CPU_FEATURE_CLMUL] = s_has_clmul,
[AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41,
[AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42,
[AWS_CPU_FEATURE_AVX2] = s_has_avx2,
[AWS_CPU_FEATURE_AVX512] = s_has_avx512,
[AWS_CPU_FEATURE_BMI2] = s_has_bmi2,
[AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq,
};

bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
Expand Down
2 changes: 1 addition & 1 deletion source/arch/intel/encoding_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static inline bool decode(const unsigned char *in, unsigned char *out) {
* so we'll just copy right out of the vector as a fallback
*/

#ifdef HAVE_MM256_EXTRACT_EPI64
#ifdef AWS_HAVE_MM256_EXTRACT_EPI64
uint64_t hi = _mm256_extract_epi64(vec, 2);
const uint64_t *p_hi = &hi;
#else
Expand Down
2 changes: 1 addition & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ add_test_case(test_byte_cursor_utf8_parse_u64_hex)

add_test_case(byte_swap_test)

if(HAVE_AVX2_INTRINSICS)
if(AWS_HAVE_AVX2_INTRINSICS)
add_test_case(alignment32_test)
else()
add_test_case(alignment16_test)
Expand Down