diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index fe6eeaaec2e4e..9ff111c14583a 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -122,10 +122,27 @@ __SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) __SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) __SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) __SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(fabs) #undef __SYCL_MATH_FUNCTION_OVERLOAD_FM #undef __SYCL_MATH_FUNCTION_OVERLOAD_IMPL +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ilogb(marray x) __NOEXC { + marray res; + for (size_t i = 0; i < N / 2; i++) { + vec partial_res = + __sycl_std::__invoke_ilogb>(detail::to_vec2(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); + } + if (N % 2) { + res[N - 1] = __sycl_std::__invoke_ilogb(x[N - 1]); + } + return res; +} + #define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ marray res; \ for (size_t i = 0; i < N / 2; i++) { \ @@ -170,6 +187,98 @@ inline __SYCL_ALWAYS_INLINE #undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL +#define __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, marray> \ + NAME(marray x, T y) __NOEXC { \ + marray res; \ + sycl::vec y_vec{y, y}; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), y_vec); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y_vec[0]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmax) + // clang-format off +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmin) + +#undef __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ldexp(marray x, marray k) __NOEXC { + // clang-format on + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k[i]); + } + return res; +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ldexp(marray x, int k) __NOEXC { + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k); + } + return res; +} + +#define __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y[i]); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + pown(marray x, marray y) __NOEXC { + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + rootn(marray x, marray y) __NOEXC { + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(rootn) +} + +#undef __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + pown(marray x, int y) __NOEXC { + __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + rootn(marray x, + int y) __NOEXC{__SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(rootn)} + +#undef __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL + #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ @@ -789,6 +898,78 @@ detail::enable_if_t::value, T> sign(T x) __NOEXC { return __sycl_std::__invoke_sign(x); } +// marray common functions + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + T res; \ + for (int i = 0; i < T::size(); i++) { \ + res[i] = NAME(__VA_ARGS__); \ + } \ + return res; + +#define __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(NAME, ARG, ...) \ + template ::value>> \ + T NAME(ARG) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(degrees, T radians, radians[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(radians, T degrees, degrees[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(sign, T x, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template ::value>> \ + T NAME(ARG1, ARG2) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(min, T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(min, T x, + detail::marray_element_type y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(max, T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(max, T x, + detail::marray_element_type y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, T edge, T x, edge[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD( + step, detail::marray_element_type edge, T x, edge, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(NAME, ARG1, ARG2, ARG3, \ + ...) \ + template ::value>> \ + T NAME(ARG1, ARG2, ARG3) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(clamp, T x, T minval, T maxval, + x[i], minval[i], maxval[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( + clamp, T x, detail::marray_element_type minval, + detail::marray_element_type maxval, x[i], minval, maxval) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, T a, x[i], y[i], + a[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, + detail::marray_element_type a, + x[i], y[i], a) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(smoothstep, T edge0, T edge1, T x, + edge0[i], edge1[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( + smoothstep, detail::marray_element_type edge0, + detail::marray_element_type edge1, T x, edge0, edge1, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD +#undef __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL + /* --------------- 4.13.4 Integer functions. --------------------------------*/ // ugeninteger abs (geninteger x) template @@ -1724,6 +1905,7 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(cos) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(tan) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log2) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log10)