diff --git a/runtime/libpgmath/lib/common/ceil.c b/runtime/libpgmath/lib/common/ceil.c index 55c827daf55..428af0e4401 100644 --- a/runtime/libpgmath/lib/common/ceil.c +++ b/runtime/libpgmath/lib/common/ceil.c @@ -6,8 +6,40 @@ */ #include "mthdecls.h" -#if defined(__SSE4_1__) || defined(__AVX__) -#include + +#if defined(TARGET_X8664) +/* + * For X8664, implement both SSE and AVX versions of __mth_i_ceil using ISA + * instruction extensions. + * + * Using inline assembly allows both the SSE and AVX versions of the routine + * to be compiled in a single unit. + * + * The following asm statements is equivalent to: + * return _mm_cvtss_f32(_mm_ceil_ss(_mm_set1_ps(x), _mm_set1_ps(x))); + * But without the need for separate compiliations for SSE4.1 and AVX ISA + * extensions. + */ + +float +__mth_i_ceil_sse(float x) +{ + __asm__( + "roundss $0x2,%0,%0" + :"+x"(x) + ); + return x; +} + +float +__mth_i_ceil_avx(float x) +{ + __asm__( + "vroundss $0x2,%0,%0,%0" + :"+x"(x) + ); + return x; +} #endif float diff --git a/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h b/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h index b2c567cfd56..b1e3fc7766f 100644 --- a/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h +++ b/runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h @@ -5,7 +5,13 @@ * */ -MTHINTRIN(ceil , ss , any , __mth_i_ceil , __mth_i_ceil , __mth_i_ceil ,__math_dispatch_error) +MTHINTRIN(ceil , ss , em64t , __mth_i_ceil , __mth_i_ceil , __mth_i_ceil ,__math_dispatch_error) +MTHINTRIN(ceil , ss , sse4 , __mth_i_ceil_sse , __mth_i_ceil_sse , __mth_i_ceil_sse ,__math_dispatch_error) +MTHINTRIN(ceil , ss , avx , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error) +MTHINTRIN(ceil , ss , avxfma4 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error) +MTHINTRIN(ceil , ss , avx2 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error) +MTHINTRIN(ceil , ss , avx512knl , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error) +MTHINTRIN(ceil , ss , avx512 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error) MTHINTRIN(ceil , ds , em64t , __mth_i_dceil , __mth_i_dceil , __mth_i_dceil ,__math_dispatch_error) MTHINTRIN(ceil , ds , sse4 , __mth_i_dceil_sse , __mth_i_dceil_sse , __mth_i_dceil_sse ,__math_dispatch_error) MTHINTRIN(ceil , ds , avx , __mth_i_dceil_avx , __mth_i_dceil_avx , __mth_i_dceil_avx ,__math_dispatch_error)