Skip to content

Commit 776e347

Browse files
committed
add missing ceil avx, sse functions
1 parent e7dedbf commit 776e347

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

runtime/libpgmath/lib/common/ceil.c

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,40 @@
66
*/
77

88
#include "mthdecls.h"
9-
#if defined(__SSE4_1__) || defined(__AVX__)
10-
#include <immintrin.h>
9+
10+
#if defined(TARGET_X8664)
11+
/*
12+
* For X8664, implement both SSE and AVX versions of __mth_i_ceil using ISA
13+
* instruction extensions.
14+
*
15+
* Using inline assembly allows both the SSE and AVX versions of the routine
16+
* to be compiled in a single unit.
17+
*
18+
* The following asm statements is equivalent to:
19+
* return _mm_cvtss_f32(_mm_ceil_ss(_mm_set1_ps(x), _mm_set1_ps(x)));
20+
* But without the need for separate compiliations for SSE4.1 and AVX ISA
21+
* extensions.
22+
*/
23+
24+
float
25+
__mth_i_dceil_sse(float x)
26+
{
27+
__asm__(
28+
"roundss $0x2,%0,%0"
29+
:"+x"(x)
30+
);
31+
return x;
32+
}
33+
34+
float
35+
__mth_i_dceil_avx(float x)
36+
{
37+
__asm__(
38+
"vroundss $0x2,%0,%0,%0"
39+
:"+x"(x)
40+
);
41+
return x;
42+
}
1143
#endif
1244

1345
float

runtime/libpgmath/lib/x86_64/math_tables/mth_ceildefs.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55
*
66
*/
77

8-
MTHINTRIN(ceil , ss , any , __mth_i_ceil , __mth_i_ceil , __mth_i_ceil ,__math_dispatch_error)
8+
MTHINTRIN(ceil , ss , em64t , __mth_i_ceil , __mth_i_ceil , __mth_i_ceil ,__math_dispatch_error)
9+
MTHINTRIN(ceil , ss , sse4 , __mth_i_ceil_sse , __mth_i_ceil_sse , __mth_i_ceil_sse ,__math_dispatch_error)
10+
MTHINTRIN(ceil , ss , avx , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error)
11+
MTHINTRIN(ceil , ss , avxfma4 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error)
12+
MTHINTRIN(ceil , ss , avx2 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error)
13+
MTHINTRIN(ceil , ss , avx512knl , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error)
14+
MTHINTRIN(ceil , ss , avx512 , __mth_i_ceil_avx , __mth_i_ceil_avx , __mth_i_ceil_avx ,__math_dispatch_error)
915
MTHINTRIN(ceil , ds , em64t , __mth_i_dceil , __mth_i_dceil , __mth_i_dceil ,__math_dispatch_error)
1016
MTHINTRIN(ceil , ds , sse4 , __mth_i_dceil_sse , __mth_i_dceil_sse , __mth_i_dceil_sse ,__math_dispatch_error)
1117
MTHINTRIN(ceil , ds , avx , __mth_i_dceil_avx , __mth_i_dceil_avx , __mth_i_dceil_avx ,__math_dispatch_error)

0 commit comments

Comments
 (0)