diff --git a/libclc/clc/include/clc/math/unary_builtin_scalarize.inc b/libclc/clc/include/clc/math/unary_builtin_scalarize.inc index 597dc3d8508a6..204b93f0b382b 100644 --- a/libclc/clc/include/clc/math/unary_builtin_scalarize.inc +++ b/libclc/clc/include/clc/math/unary_builtin_scalarize.inc @@ -17,10 +17,11 @@ #define __CLC_BUILTIN_H __CLC_BUILTIN_F #endif +#if (!defined(__HALF_ONLY) && !defined(__DOUBLE_ONLY)) _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN_F, float) +#endif -#ifndef __FLOAT_ONLY - +#if (!defined(__HALF_ONLY) && !defined(__FLOAT_ONLY)) #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -28,7 +29,9 @@ _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN_F, floa _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN_D, double) #endif +#endif // (!defined(__HALF_ONLY) && !defined(__FLOAT_ONLY)) +#if (!defined(__FLOAT_ONLY) && !defined(__DOUBLE_ONLY)) #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -36,5 +39,4 @@ _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN_D, dou _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN_H, half) #endif - -#endif // !__FLOAT_ONLY +#endif // (!defined(__FLOAT_ONLY) && !defined(__DOUBLE_ONLY)) diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index 3a48049271aff..de58ba849bb51 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -1,3 +1,9 @@ +math/clc_exp.cl math/clc_fmax.cl math/clc_fmin.cl math/clc_ldexp_override.cl +math/clc_lgamma.cl +math/clc_log.cl +math/clc_sinpi.cl +math/clc_sqrt.cl +math/clc_sqrt_fp64.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/exp.cl b/libclc/clc/lib/amdgcn/math/clc_exp.cl similarity index 90% rename from libclc/libspirv/lib/amdgcn-amdhsa/math/exp.cl rename to libclc/clc/lib/amdgcn/math/clc_exp.cl index ac4cbb771a4d4..7f29777ae5b77 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/exp.cl +++ b/libclc/clc/lib/amdgcn/math/clc_exp.cl @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include -#define __CLC_FUNCTION __spirv_ocl_exp +#define __CLC_FUNCTION __clc_exp #define __CLC_BUILTIN __ocml_exp float __ocml_exp_f32(float); diff --git a/libclc/clc/lib/amdgcn/math/clc_fmax.cl b/libclc/clc/lib/amdgcn/math/clc_fmax.cl index 20bdcadb9eabf..652381bd6e1d3 100644 --- a/libclc/clc/lib/amdgcn/math/clc_fmax.cl +++ b/libclc/clc/lib/amdgcn/math/clc_fmax.cl @@ -6,44 +6,25 @@ // //===----------------------------------------------------------------------===// -#include #include -#include +#include -_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) { - // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise - // fmax instruction flushes the values for comparison, but outputs original - // denormal - x = __builtin_canonicalizef(x); - y = __builtin_canonicalizef(y); - return __builtin_fmaxf(x, y); -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmax, float, float) +#define __CLC_FUNCTION __clc_fmax +#define __CLC_BUILTIN __ocml_fmax -#ifdef cl_khr_fp64 +float __ocml_fmax_f32(float, float); +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable +double __ocml_fmax_f64(double, double); +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#endif // cl_khr_fp64 -_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) { - x = __builtin_canonicalize(x); - y = __builtin_canonicalize(y); - return __builtin_fmax(x, y); -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmax, double, - double) - -#endif #ifdef cl_khr_fp16 - #pragma OPENCL EXTENSION cl_khr_fp16 : enable +half __ocml_fmax_f16(half, half); +#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) +#endif // cl_khr_fp16 -_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) { - if (__clc_isnan(x)) - return y; - if (__clc_isnan(y)) - return x; - return (y < x) ? x : y; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmax, half, half) - -#endif +#include diff --git a/libclc/clc/lib/amdgcn/math/clc_fmin.cl b/libclc/clc/lib/amdgcn/math/clc_fmin.cl index a5f66dfefa900..c07d6aaaefa9c 100644 --- a/libclc/clc/lib/amdgcn/math/clc_fmin.cl +++ b/libclc/clc/lib/amdgcn/math/clc_fmin.cl @@ -6,45 +6,25 @@ // //===----------------------------------------------------------------------===// -#include #include -#include +#include -_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) { - // fcanonicalize removes sNaNs and flushes denormals if not enabled. Otherwise - // fmin instruction flushes the values for comparison, but outputs original - // denormal - x = __builtin_canonicalizef(x); - y = __builtin_canonicalizef(y); - return __builtin_fminf(x, y); -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmin, float, float) +#define __CLC_FUNCTION __clc_fmin +#define __CLC_BUILTIN __ocml_fmin -#ifdef cl_khr_fp64 +float __ocml_fmin_f32(float, float); +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) { - x = __builtin_canonicalize(x); - y = __builtin_canonicalize(y); - return __builtin_fmin(x, y); -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmin, double, - double) - -#endif +double __ocml_fmin_f64(double, double); +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#endif // cl_khr_fp64 #ifdef cl_khr_fp16 - #pragma OPENCL EXTENSION cl_khr_fp16 : enable +half __ocml_fmin_f16(half, half); +#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) +#endif // cl_khr_fp16 -_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) { - if (__clc_isnan(x)) - return y; - if (__clc_isnan(y)) - return x; - return (y < x) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmin, half, half) - -#endif +#include diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/lgamma.cl b/libclc/clc/lib/amdgcn/math/clc_lgamma.cl similarity index 90% rename from libclc/libspirv/lib/amdgcn-amdhsa/math/lgamma.cl rename to libclc/clc/lib/amdgcn/math/clc_lgamma.cl index c6296ce7a108e..2cd3d3cb7131f 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/lgamma.cl +++ b/libclc/clc/lib/amdgcn/math/clc_lgamma.cl @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include -#define __CLC_FUNCTION __spirv_ocl_lgamma +#define __CLC_FUNCTION __clc_lgamma #define __CLC_BUILTIN __ocml_lgamma float __ocml_lgamma_f32(float); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/log.cl b/libclc/clc/lib/amdgcn/math/clc_log.cl similarity index 90% rename from libclc/libspirv/lib/amdgcn-amdhsa/math/log.cl rename to libclc/clc/lib/amdgcn/math/clc_log.cl index 2981a84060a0e..52a3f93b200ac 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/log.cl +++ b/libclc/clc/lib/amdgcn/math/clc_log.cl @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include -#define __CLC_FUNCTION __spirv_ocl_log +#define __CLC_FUNCTION __clc_log #define __CLC_BUILTIN __ocml_log float __ocml_log_f32(float); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/sinpi.cl b/libclc/clc/lib/amdgcn/math/clc_sinpi.cl similarity index 90% rename from libclc/libspirv/lib/amdgcn-amdhsa/math/sinpi.cl rename to libclc/clc/lib/amdgcn/math/clc_sinpi.cl index ae4f8c9b6060e..c39605fe5b80f 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/sinpi.cl +++ b/libclc/clc/lib/amdgcn/math/clc_sinpi.cl @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include -#define __CLC_FUNCTION __spirv_ocl_sinpi +#define __CLC_FUNCTION __clc_sinpi #define __CLC_BUILTIN __ocml_sinpi float __ocml_sinpi_f32(float); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/sqrt.cl b/libclc/clc/lib/amdgcn/math/clc_sqrt.cl similarity index 72% rename from libclc/libspirv/lib/amdgcn-amdhsa/math/sqrt.cl rename to libclc/clc/lib/amdgcn/math/clc_sqrt.cl index 29ea05be4ceb5..d7dbcca76af8c 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/sqrt.cl +++ b/libclc/clc/lib/amdgcn/math/clc_sqrt.cl @@ -6,20 +6,20 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include -#define __CLC_FUNCTION __spirv_ocl_sqrt +#define __CLC_FUNCTION __clc_sqrt #define __CLC_BUILTIN __ocml_sqrt float __ocml_sqrt_f32(float); #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -double __ocml_sqrt_f64(double); -#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) -#endif // cl_khr_fp64 +#define __FLOAT_ONLY +#include + +#undef __FLOAT_ONLY +#undef __CLC_BUILTIN_H #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -27,4 +27,5 @@ half __ocml_sqrt_f16(half); #define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) #endif // cl_khr_fp16 +#define __HALF_ONLY #include diff --git a/libclc/clc/lib/amdgcn/math/clc_sqrt_fp64.cl b/libclc/clc/lib/amdgcn/math/clc_sqrt_fp64.cl new file mode 100644 index 0000000000000..f2d469b0a1fe3 --- /dev/null +++ b/libclc/clc/lib/amdgcn/math/clc_sqrt_fp64.cl @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define __CLC_FUNCTION __clc_sqrt +#define __CLC_BUILTIN __ocml_sqrt + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +double __ocml_sqrt_f64(double); +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#endif // cl_khr_fp64 + +#define __DOUBLE_ONLY +#include diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES b/libclc/clc/lib/ptx-nvidiacl/SOURCES new file mode 100644 index 0000000000000..1e1594b7665da --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES @@ -0,0 +1,3 @@ +math/clc_log.cl +math/clc_sinpi.cl +math/clc_sqrt.cl diff --git a/libclc/libspirv/lib/ptx-nvidiacl/math/log.cl b/libclc/clc/lib/ptx-nvidiacl/math/clc_log.cl similarity index 78% rename from libclc/libspirv/lib/ptx-nvidiacl/math/log.cl rename to libclc/clc/lib/ptx-nvidiacl/math/clc_log.cl index 62d020f177d6f..ef8520d9538d4 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/math/log.cl +++ b/libclc/clc/lib/ptx-nvidiacl/math/clc_log.cl @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// -#include +#include +#include -#include -#include +float __nv_logf(float); +double __nv_log(double); -#define __CLC_FUNCTION __spirv_ocl_log +#define __CLC_FUNCTION __clc_log #define __CLC_BUILTIN __nv_log #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) #include diff --git a/libclc/libspirv/lib/ptx-nvidiacl/math/sinpi.cl b/libclc/clc/lib/ptx-nvidiacl/math/clc_sinpi.cl similarity index 77% rename from libclc/libspirv/lib/ptx-nvidiacl/math/sinpi.cl rename to libclc/clc/lib/ptx-nvidiacl/math/clc_sinpi.cl index eb3cacbcce919..0bc30bf771196 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/math/sinpi.cl +++ b/libclc/clc/lib/ptx-nvidiacl/math/clc_sinpi.cl @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// -#include +#include +#include -#include -#include +float __nv_sinpif(float); +double __nv_sinpi(double); -#define __CLC_FUNCTION __spirv_ocl_sinpi +#define __CLC_FUNCTION __clc_sinpi #define __CLC_BUILTIN __nv_sinpi #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) #include diff --git a/libclc/libspirv/lib/ptx-nvidiacl/math/sqrt.cl b/libclc/clc/lib/ptx-nvidiacl/math/clc_sqrt.cl similarity index 77% rename from libclc/libspirv/lib/ptx-nvidiacl/math/sqrt.cl rename to libclc/clc/lib/ptx-nvidiacl/math/clc_sqrt.cl index fbfa98fcff66b..ecaad58eec014 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/math/sqrt.cl +++ b/libclc/clc/lib/ptx-nvidiacl/math/clc_sqrt.cl @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// -#include +#include +#include -#include +float __nv_sqrtf(float); +double __nv_sqrt(double); -#define __CLC_FUNCTION __spirv_ocl_sqrt +#define __CLC_FUNCTION __clc_sqrt #define __CLC_BUILTIN __nv_sqrt #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) #include diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 25ad07b6b8c18..9444083daeca2 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -33,20 +33,15 @@ math/cosh.cl math/cospi.cl math/erf.cl math/erfc.cl -math/exp.cl math/exp10.cl math/exp2.cl math/expm1.cl math/fdim.cl -math/fmax.cl -math/fmin.cl math/fmod.cl math/frexp.cl math/hypot.cl math/ilogb.cl math/ldexp.cl -math/lgamma.cl -math/log.cl math/log2.cl math/log10.cl math/log1p.cl @@ -60,8 +55,6 @@ math/rsqrt.cl math/sin.cl math/sincos.cl math/sinh.cl -math/sinpi.cl -math/sqrt.cl math/tan.cl math/tanh.cl workitem/get_global_size.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/fmax.cl b/libclc/libspirv/lib/amdgcn-amdhsa/math/fmax.cl deleted file mode 100644 index c06800d00273c..0000000000000 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/fmax.cl +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#define __CLC_FUNCTION __spirv_ocl_fmax -#define __CLC_BUILTIN __ocml_fmax - -float __ocml_fmax_f32(float, float); -#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -double __ocml_fmax_f64(double, double); -#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -half __ocml_fmax_f16(half, half); -#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) -#endif // cl_khr_fp16 - -#include diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/math/fmin.cl b/libclc/libspirv/lib/amdgcn-amdhsa/math/fmin.cl deleted file mode 100644 index 58152d666c973..0000000000000 --- a/libclc/libspirv/lib/amdgcn-amdhsa/math/fmin.cl +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#define __CLC_FUNCTION __spirv_ocl_fmin -#define __CLC_BUILTIN __ocml_fmin - -float __ocml_fmin_f32(float, float); -#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -double __ocml_fmin_f64(double, double); -#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -half __ocml_fmin_f16(half, half); -#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) -#endif // cl_khr_fp16 - -#include diff --git a/libclc/libspirv/lib/generic/SOURCES b/libclc/libspirv/lib/generic/SOURCES index 76de074ec034a..0a1a7b2ed471b 100644 --- a/libclc/libspirv/lib/generic/SOURCES +++ b/libclc/libspirv/lib/generic/SOURCES @@ -145,7 +145,6 @@ math/sinh.cl math/sinpi.cl math/sqrt.cl math/rsqrt.cl -math/tables.cl math/tan.cl math/tanh.cl math/tanpi.cl diff --git a/libclc/libspirv/lib/generic/common/sign.cl b/libclc/libspirv/lib/generic/common/sign.cl index 6e6dad03fd1d7..26192fa4c4b20 100644 --- a/libclc/libspirv/lib/generic/common/sign.cl +++ b/libclc/libspirv/lib/generic/common/sign.cl @@ -6,40 +6,11 @@ // //===----------------------------------------------------------------------===// -#include +#include #include -#define SIGN(TYPE, F) \ - _CLC_DEF _CLC_OVERLOAD TYPE __spirv_ocl_sign(TYPE x) { \ - if (__spirv_IsNan(x)) { \ - return 0.0F; \ - } \ - if (x > 0.0F) { \ - return 1.0F; \ - } \ - if (x < 0.0F) { \ - return -1.0F; \ - } \ - return x; /* -0.0 or +0.0 */ \ - } +#define FUNCTION __spirv_ocl_sign +#define __CLC_FUNCTION(x) __clc_sign +#define __CLC_BODY -SIGN(float, f) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_sign, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -SIGN(double, ) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sign, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -SIGN(half, h) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_sign, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/common/step.cl b/libclc/libspirv/lib/generic/common/step.cl index 545e645b2e4c4..c6da6035776d8 100644 --- a/libclc/libspirv/lib/generic/common/step.cl +++ b/libclc/libspirv/lib/generic/common/step.cl @@ -6,29 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include - -#define STEP_DEF(TYPE, TYPOSTFIX) \ - _CLC_OVERLOAD _CLC_DEF TYPE __spirv_ocl_step(TYPE edge, TYPE x) { \ - return x < edge ? 0.0##TYPOSTFIX : 1.0##TYPOSTFIX; \ - } \ - _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, TYPE, __spirv_ocl_step, TYPE, \ - TYPE) - -STEP_DEF(float, f) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -STEP_DEF(double, ) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -STEP_DEF(half, h) - -#endif +#define FUNCTION __spirv_ocl_step +#define __CLC_FUNCTION(x) __clc_step +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/acos.cl b/libclc/libspirv/lib/generic/math/acos.cl index 44d723be5c3ea..feccbd4ca08f6 100644 --- a/libclc/libspirv/lib/generic/math/acos.cl +++ b/libclc/libspirv/lib/generic/math/acos.cl @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY +#define FUNCTION __spirv_ocl_acos +#define __CLC_FUNCTION(x) __clc_acos +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/acos.inc b/libclc/libspirv/lib/generic/math/acos.inc deleted file mode 100644 index 947730bead1f8..0000000000000 --- a/libclc/libspirv/lib/generic/math/acos.inc +++ /dev/null @@ -1,37 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/* - * There are multiple formulas for calculating arccosine of x: - * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...) - * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet) - * 3) acos(x) = pi/2 - asin(x) (ditto) - * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x)) - * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) ) - * - * Options 1-3 are not currently usable, #5 generates more concise radeonsi - * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but - * precision of #4 may be better. - */ - -#if __CLC_FPSIZE == 64 -#define __CLC_CONST(x) x -#elif __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x##f -#elif __CLC_FPSIZE == 16 -#define __CLC_CONST(x) x##h -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_acos(__CLC_GENTYPE x) { - return ( - (__CLC_GENTYPE)__CLC_CONST(2.0) * - __spirv_ocl_atan2(__spirv_ocl_sqrt((__CLC_GENTYPE)__CLC_CONST(1.0) - x), - __spirv_ocl_sqrt((__CLC_GENTYPE)__CLC_CONST(1.0) + x))); -} - -#undef __CLC_CONST diff --git a/libclc/libspirv/lib/generic/math/acosh.cl b/libclc/libspirv/lib/generic/math/acosh.cl index a96004db77d62..4f00bb2ef4db4 100644 --- a/libclc/libspirv/lib/generic/math/acosh.cl +++ b/libclc/libspirv/lib/generic/math/acosh.cl @@ -6,118 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include "ep_log.h" -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_acosh(float x) { - uint ux = __clc_as_uint(x); - - // Arguments greater than 1/sqrt(epsilon) in magnitude are - // approximated by acosh(x) = ln(2) + ln(x) - // For 2.0 <= x <= 1/sqrt(epsilon) the approximation is - // acosh(x) = ln(x + sqrt(x*x-1)) */ - int high = ux > 0x46000000U; - int med = ux > 0x40000000U; - - float w = x - 1.0f; - float s = w * w + 2.0f * w; - float t = x * x - 1.0f; - float r = __spirv_ocl_sqrt(med ? t : s) + (med ? x : w); - float v = (high ? x : r) - (med ? 1.0f : 0.0f); - float z = __spirv_ocl_log1p(v) + (high ? 0x1.62e430p-1f : 0.0f); - - z = ux >= PINFBITPATT_SP32 ? x : z; - z = x < 1.0f ? __clc_as_float(QNANBITPATT_SP32) : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_acosh, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_acosh(double x) { - const double recrteps = - 0x1.6a09e667f3bcdp+26; // 1/sqrt(eps) = 9.49062656242515593767e+07 - // log2_lead and log2_tail sum to an extra-precise version of log(2) - const double log2_lead = 0x1.62e42ep-1; - const double log2_tail = 0x1.efa39ef35793cp-25; - - // Handle x >= 128 here - int xlarge = x > recrteps; - double r = x + __spirv_ocl_sqrt(__spirv_ocl_fma(x, x, -1.0)); - r = xlarge ? x : r; - - int xexp; - double r1, r2; - __clc_ep_log(r, &xexp, &r1, &r2); - - double dxexp = xexp + xlarge; - r1 = __spirv_ocl_fma(dxexp, log2_lead, r1); - r2 = __spirv_ocl_fma(dxexp, log2_tail, r2); - - double ret1 = r1 + r2; - - // Handle 1 < x < 128 here - // We compute the value - // t = x - 1.0 + sqrt(2.0*(x - 1.0) + (x - 1.0)*(x - 1.0)) - // using simulated quad precision. - double t = x - 1.0; - double u1 = t * 2.0; - - // (t,0) * (t,0) -> (v1, v2) - double v1 = t * t; - double v2 = __spirv_ocl_fma(t, t, -v1); - - // (u1,0) + (v1,v2) -> (w1,w2) - r = u1 + v1; - double s = (((u1 - r) + v1) + v2); - double w1 = r + s; - double w2 = (r - w1) + s; - - // sqrt(w1,w2) -> (u1,u2) - double p1 = __spirv_ocl_sqrt(w1); - double a1 = p1 * p1; - double a2 = __spirv_ocl_fma(p1, p1, -a1); - double temp = (((w1 - a1) - a2) + w2); - double p2 = MATH_DIVIDE(temp * 0.5, p1); - u1 = p1 + p2; - double u2 = (p1 - u1) + p2; - - // (u1,u2) + (t,0) -> (r1,r2) - r = u1 + t; - s = ((u1 - r) + t) + u2; - // r1 = r + s; - // r2 = (r - r1) + s; - // t = r1 + r2; - t = r + s; - - // For arguments 1.13 <= x <= 1.5 the log1p function is good enough - double ret2 = __spirv_ocl_log1p(t); - - ulong ux = __clc_as_ulong(x); - double ret = x >= 128.0 ? ret1 : ret2; - - ret = ux >= 0x7FF0000000000000 ? x : ret; - ret = x == 1.0 ? 0.0 : ret; - ret = - (ux & SIGNBIT_DP64) != 0UL || x < 1.0 ? __clc_as_double(QNANBITPATT_DP64) : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_acosh, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_acosh, __builtin_acoshf, half) - -#endif +#define FUNCTION __spirv_ocl_acosh +#define __CLC_FUNCTION(x) __clc_acosh +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/acospi.cl b/libclc/libspirv/lib/generic/math/acospi.cl index 294915f983ece..6caaaf1dfc671 100644 --- a/libclc/libspirv/lib/generic/math/acospi.cl +++ b/libclc/libspirv/lib/generic/math/acospi.cl @@ -6,179 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_acospi(float x) { - // Computes arccos(x). - // The argument is first reduced by noting that arccos(x) - // is invalid for abs(x) > 1. For denormal and small - // arguments arccos(x) = pi/2 to machine accuracy. - // Remaining argument ranges are handled as follows. - // For abs(x) <= 0.5 use - // arccos(x) = pi/2 - arcsin(x) - // = pi/2 - (x + x^3*R(x^2)) - // where R(x^2) is a rational minimax approximation to - // (arcsin(x) - x)/x^3. - // For abs(x) > 0.5 exploit the identity: - // arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) - // together with the above rational approximation, and - // reconstruct the terms carefully. - - // Some constants and split constants. - const float pi = 3.1415926535897933e+00f; - const float piby2_head = 1.5707963267948965580e+00f; /* 0x3ff921fb54442d18 */ - const float piby2_tail = 6.12323399573676603587e-17f; /* 0x3c91a62633145c07 */ - - uint ux = __clc_as_uint(x); - uint aux = ux & ~SIGNBIT_SP32; - int xneg = ux != aux; - int xexp = (int)(aux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - - float y = __clc_as_float(aux); - - // transform if |x| >= 0.5 - int transform = xexp >= -1; - - float y2 = y * y; - float yt = 0.5f * (1.0f - y); - float r = transform ? yt : y2; - - // Use a rational approximation for [0.0, 0.5] - float a = __spirv_ocl_mad( - r, - __spirv_ocl_mad(r, - __spirv_ocl_mad(r, -0.00396137437848476485201154797087F, - -0.0133819288943925804214011424456F), - -0.0565298683201845211985026327361F), - 0.184161606965100694821398249421F); - float b = __spirv_ocl_mad(r, -0.836411276854206731913362287293F, - 1.10496961524520294485512696706F); - float u = r * MATH_DIVIDE(a, b); - - float s = __spirv_ocl_sqrt(r); - y = s; - float s1 = __clc_as_float(__clc_as_uint(s) & 0xffff0000); - float c = MATH_DIVIDE(r - s1 * s1, s + s1); - // float rettn = 1.0f - MATH_DIVIDE(2.0f * (s + (y * u - piby2_tail)), pi); - float rettn = - 1.0f - MATH_DIVIDE(2.0f * (s + __spirv_ocl_mad(y, u, -piby2_tail)), pi); - // float rettp = MATH_DIVIDE(2.0F * s1 + (2.0F * c + 2.0F * y * u), pi); - float rettp = MATH_DIVIDE(2.0f * (s1 + __spirv_ocl_mad(y, u, c)), pi); - float rett = xneg ? rettn : rettp; - // float ret = MATH_DIVIDE(piby2_head - (x - (piby2_tail - x * u)), pi); - float ret = - MATH_DIVIDE(piby2_head - (x - __spirv_ocl_mad(x, -u, piby2_tail)), pi); - - ret = transform ? rett : ret; - ret = aux > 0x3f800000U ? __clc_as_float(QNANBITPATT_SP32) : ret; - ret = ux == 0x3f800000U ? 0.0f : ret; - ret = ux == 0xbf800000U ? 1.0f : ret; - ret = xexp < -26 ? 0.5f : ret; - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_acospi, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_acospi(double x) { - // Computes arccos(x). - // The argument is first reduced by noting that arccos(x) - // is invalid for abs(x) > 1. For denormal and small - // arguments arccos(x) = pi/2 to machine accuracy. - // Remaining argument ranges are handled as follows. - // For abs(x) <= 0.5 use - // arccos(x) = pi/2 - arcsin(x) - // = pi/2 - (x + x^3*R(x^2)) - // where R(x^2) is a rational minimax approximation to - // (arcsin(x) - x)/x^3. - // For abs(x) > 0.5 exploit the identity: - // arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) - // together with the above rational approximation, and - // reconstruct the terms carefully. - - const double pi = 0x1.921fb54442d18p+1; - const double piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ - - double y = __spirv_ocl_fabs(x); - int xneg = __clc_as_int2(x).hi < 0; - int xexp = (__clc_as_int2(y).hi >> 20) - EXPBIAS_DP64; - - // abs(x) >= 0.5 - int transform = xexp >= -1; - - // Transform y into the range [0,0.5) - double r1 = 0.5 * (1.0 - y); - double s = __spirv_ocl_sqrt(r1); - double r = y * y; - r = transform ? r1 : r; - y = transform ? s : y; - - // Use a rational approximation for [0.0, 0.5] - double un = __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma(r, 0.0000482901920344786991880522822991, - 0.00109242697235074662306043804220), - -0.0549989809235685841612020091328), - 0.275558175256937652532686256258), - -0.445017216867635649900123110649), - 0.227485835556935010735943483075); - - double ud = __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma(r, - __spirv_ocl_fma(r, 0.105869422087204370341222318533, - -0.943639137032492685763471240072), - 2.76568859157270989520376345954), - -3.28431505720958658909889444194), - 1.36491501334161032038194214209); - - double u = r * MATH_DIVIDE(un, ud); - - // Reconstruct acos carefully in transformed region - double res1 = __spirv_ocl_fma( - -2.0, MATH_DIVIDE(s + __spirv_ocl_fma(y, u, -piby2_tail), pi), 1.0); - double s1 = __clc_as_double(__clc_as_ulong(s) & 0xffffffff00000000UL); - double c = MATH_DIVIDE(__spirv_ocl_fma(-s1, s1, r), s + s1); - double res2 = MATH_DIVIDE( - __spirv_ocl_fma(2.0, s1, __spirv_ocl_fma(2.0, c, 2.0 * y * u)), pi); - res1 = xneg ? res1 : res2; - res2 = 0.5 - __spirv_ocl_fma(x, u, x) / pi; - res1 = transform ? res1 : res2; - - const double qnan = __clc_as_double(QNANBITPATT_DP64); - res2 = x == 1.0 ? 0.0 : qnan; - res2 = x == -1.0 ? 1.0 : res2; - res1 = xexp >= 0 ? res2 : res1; - res1 = xexp < -56 ? 0.5 : res1; - - return res1; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_acospi, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_acospi(half x) { - float t = x; - return __spirv_ocl_acospi(t); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_acospi, half) - -#endif +#define FUNCTION __spirv_ocl_acospi +#define __CLC_FUNCTION(x) __clc_acospi +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/asin.cl b/libclc/libspirv/lib/generic/math/asin.cl index 05c61fb3b8a4a..e88939ed33b77 100644 --- a/libclc/libspirv/lib/generic/math/asin.cl +++ b/libclc/libspirv/lib/generic/math/asin.cl @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY +#define FUNCTION __spirv_ocl_asin +#define __CLC_FUNCTION(x) __clc_asin +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/asin.inc b/libclc/libspirv/lib/generic/math/asin.inc deleted file mode 100644 index f32aca0fb7c5c..0000000000000 --- a/libclc/libspirv/lib/generic/math/asin.inc +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 64 -#define __CLC_CONST(x) x -#elif __CLC_FPSIZE == 32 -#define __CLC_CONST(x) x##f -#elif __CLC_FPSIZE == 16 -#define __CLC_CONST(x) x##h -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_asin(__CLC_GENTYPE x) { - return __spirv_ocl_atan2( - x, __spirv_ocl_sqrt((__CLC_GENTYPE)__CLC_CONST(1.0) - (x * x))); -} - -#undef __CLC_CONST diff --git a/libclc/libspirv/lib/generic/math/asinh.cl b/libclc/libspirv/lib/generic/math/asinh.cl index 5c69aa537a284..aca02861e4387 100644 --- a/libclc/libspirv/lib/generic/math/asinh.cl +++ b/libclc/libspirv/lib/generic/math/asinh.cl @@ -6,366 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include "ep_log.h" -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_asinh(float x) { - uint ux = __clc_as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - uint xsgn = ax ^ ux; - - // |x| <= 2 - float t = x * x; - float a = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, - __spirv_ocl_mad(t, -1.177198915954942694e-4f, - -4.162727710583425360e-2f), - -5.063201055468483248e-1f), - -1.480204186473758321f), - -1.152965835871758072f); - float b = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, - __spirv_ocl_mad(t, 6.284381367285534560e-2f, - 1.260024978680227945f), - 6.582362487198468066f), - 11.99423176003939087f), - 6.917795026025976739f); - - float q = MATH_DIVIDE(a, b); - float z1 = __spirv_ocl_mad(x * t, q, x); - - // |x| > 2 - - // Arguments greater than 1/sqrt(epsilon) in magnitude are - // approximated by asinh(x) = ln(2) + ln(abs(x)), with sign of x - // Arguments such that 4.0 <= abs(x) <= 1/sqrt(epsilon) are - // approximated by asinhf(x) = ln(abs(x) + sqrt(x*x+1)) - // with the sign of x (see Abramowitz and Stegun 4.6.20) - - float absx = __clc_as_float(ax); - int hi = ax > 0x46000000U; - float y = MATH_SQRT(absx * absx + 1.0f) + absx; - y = hi ? absx : y; - float r = __spirv_ocl_log(y) + (hi ? 0x1.62e430p-1f : 0.0f); - float z2 = __clc_as_float(xsgn | __clc_as_uint(r)); - - float z = ax <= 0x40000000 ? z1 : z2; - z = ax < 0x39800000U || ax >= PINFBITPATT_SP32 ? x : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_asinh, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define NA0 -0.12845379283524906084997e0 -#define NA1 -0.21060688498409799700819e0 -#define NA2 -0.10188951822578188309186e0 -#define NA3 -0.13891765817243625541799e-1 -#define NA4 -0.10324604871728082428024e-3 - -#define DA0 0.77072275701149440164511e0 -#define DA1 0.16104665505597338100747e1 -#define DA2 0.11296034614816689554875e1 -#define DA3 0.30079351943799465092429e0 -#define DA4 0.235224464765951442265117e-1 - -#define NB0 -0.12186605129448852495563e0 -#define NB1 -0.19777978436593069928318e0 -#define NB2 -0.94379072395062374824320e-1 -#define NB3 -0.12620141363821680162036e-1 -#define NB4 -0.903396794842691998748349e-4 - -#define DB0 0.73119630776696495279434e0 -#define DB1 0.15157170446881616648338e1 -#define DB2 0.10524909506981282725413e1 -#define DB3 0.27663713103600182193817e0 -#define DB4 0.21263492900663656707646e-1 - -#define NC0 -0.81210026327726247622500e-1 -#define NC1 -0.12327355080668808750232e0 -#define NC2 -0.53704925162784720405664e-1 -#define NC3 -0.63106739048128554465450e-2 -#define NC4 -0.35326896180771371053534e-4 - -#define DC0 0.48726015805581794231182e0 -#define DC1 0.95890837357081041150936e0 -#define DC2 0.62322223426940387752480e0 -#define DC3 0.15028684818508081155141e0 -#define DC4 0.10302171620320141529445e-1 - -#define ND0 -0.4638179204422665073e-1 -#define ND1 -0.7162729496035415183e-1 -#define ND2 -0.3247795155696775148e-1 -#define ND3 -0.4225785421291932164e-2 -#define ND4 -0.3808984717603160127e-4 -#define ND5 0.8023464184964125826e-6 - -#define DD0 0.2782907534642231184e0 -#define DD1 0.5549945896829343308e0 -#define DD2 0.3700732511330698879e0 -#define DD3 0.9395783438240780722e-1 -#define DD4 0.7200057974217143034e-2 - -#define NE0 -0.121224194072430701e-4 -#define NE1 -0.273145455834305218e-3 -#define NE2 -0.152866982560895737e-2 -#define NE3 -0.292231744584913045e-2 -#define NE4 -0.174670900236060220e-2 -#define NE5 -0.891754209521081538e-12 - -#define DE0 0.499426632161317606e-4 -#define DE1 0.139591210395547054e-2 -#define DE2 0.107665231109108629e-1 -#define DE3 0.325809818749873406e-1 -#define DE4 0.415222526655158363e-1 -#define DE5 0.186315628774716763e-1 - -#define NF0 -0.195436610112717345e-4 -#define NF1 -0.233315515113382977e-3 -#define NF2 -0.645380957611087587e-3 -#define NF3 -0.478948863920281252e-3 -#define NF4 -0.805234112224091742e-12 -#define NF5 0.246428598194879283e-13 - -#define DF0 0.822166621698664729e-4 -#define DF1 0.135346265620413852e-2 -#define DF2 0.602739242861830658e-2 -#define DF3 0.972227795510722956e-2 -#define DF4 0.510878800983771167e-2 - -#define NG0 -0.209689451648100728e-6 -#define NG1 -0.219252358028695992e-5 -#define NG2 -0.551641756327550939e-5 -#define NG3 -0.382300259826830258e-5 -#define NG4 -0.421182121910667329e-17 -#define NG5 0.492236019998237684e-19 - -#define DG0 0.889178444424237735e-6 -#define DG1 0.131152171690011152e-4 -#define DG2 0.537955850185616847e-4 -#define DG3 0.814966175170941864e-4 -#define DG4 0.407786943832260752e-4 - -#define NH0 -0.178284193496441400e-6 -#define NH1 -0.928734186616614974e-6 -#define NH2 -0.923318925566302615e-6 -#define NH3 -0.776417026702577552e-19 -#define NH4 0.290845644810826014e-21 - -#define DH0 0.786694697277890964e-6 -#define DH1 0.685435665630965488e-5 -#define DH2 0.153780175436788329e-4 -#define DH3 0.984873520613417917e-5 - -#define NI0 -0.538003743384069117e-10 -#define NI1 -0.273698654196756169e-9 -#define NI2 -0.268129826956403568e-9 -#define NI3 -0.804163374628432850e-29 - -#define DI0 0.238083376363471960e-9 -#define DI1 0.203579344621125934e-8 -#define DI2 0.450836980450693209e-8 -#define DI3 0.286005148753497156e-8 - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_asinh(double x) { - const double rteps = 0x1.6a09e667f3bcdp-27; - const double recrteps = 0x1.6a09e667f3bcdp+26; - - // log2_lead and log2_tail sum to an extra-precise version of log(2) - const double log2_lead = 0x1.62e42ep-1; - const double log2_tail = 0x1.efa39ef35793cp-25; - - ulong ux = __clc_as_ulong(x); - ulong ax = ux & ~SIGNBIT_DP64; - double absx = __clc_as_double(ax); - - double t = x * x; - double pn, tn, pd, td; - - // XXX we are betting here that we can evaluate 8 pairs of - // polys faster than we can grab 12 coefficients from a table - // This also uses fewer registers - - // |x| >= 8 - pn = __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NI3, NI2), NI1), - NI0); - pd = __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DI3, DI2), DI1), - DI0); - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NH4, NH3), NH2), - NH1), - NH0); - td = __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DH3, DH2), DH1), - DH0); - pn = absx < 8.0 ? tn : pn; - pd = absx < 8.0 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NG5, NG4), NG3), NG2), - NG1), - NG0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DG4, DG3), DG2), - DG1), - DG0); - pn = absx < 4.0 ? tn : pn; - pd = absx < 4.0 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NF5, NF4), NF3), NF2), - NF1), - NF0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DF4, DF3), DF2), - DF1), - DF0); - pn = absx < 2.0 ? tn : pn; - pd = absx < 2.0 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NE5, NE4), NE3), NE2), - NE1), - NE0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DE5, DE4), DE3), DE2), - DE1), - DE0); - pn = absx < 1.5 ? tn : pn; - pd = absx < 1.5 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, ND5, ND4), ND3), ND2), - ND1), - ND0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DD4, DD3), DD2), - DD1), - DD0); - pn = absx <= 1.0 ? tn : pn; - pd = absx <= 1.0 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NC4, NC3), NC2), - NC1), - NC0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DC4, DC3), DC2), - DC1), - DC0); - pn = absx < 0.75 ? tn : pn; - pd = absx < 0.75 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NB4, NB3), NB2), - NB1), - NB0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DB4, DB3), DB2), - DB1), - DB0); - pn = absx < 0.5 ? tn : pn; - pd = absx < 0.5 ? td : pd; - - tn = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, NA4, NA3), NA2), - NA1), - NA0); - td = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DA4, DA3), DA2), - DA1), - DA0); - pn = absx < 0.25 ? tn : pn; - pd = absx < 0.25 ? td : pd; - - double pq = MATH_DIVIDE(pn, pd); - - // |x| <= 1 - double result1 = __spirv_ocl_fma(absx * t, pq, absx); - - // Other ranges - int xout = absx <= 32.0 | absx > recrteps; - double y = absx + __spirv_ocl_sqrt(__spirv_ocl_fma(absx, absx, 1.0)); - y = xout ? absx : y; - - double r1, r2; - int xexp; - __clc_ep_log(y, &xexp, &r1, &r2); - - double dxexp = (double)(xexp + xout); - r1 = __spirv_ocl_fma(dxexp, log2_lead, r1); - r2 = __spirv_ocl_fma(dxexp, log2_tail, r2); - - // 1 < x <= 32 - double v2 = (pq + 0.25) / t; - double r = v2 + r1; - double s = ((r1 - r) + v2) + r2; - double v1 = r + s; - v2 = (r - v1) + s; - double result2 = v1 + v2; - - // x > 32 - double result3 = r1 + r2; - - double ret = absx > 1.0 ? result2 : result1; - ret = absx > 32.0 ? result3 : ret; - ret = x < 0.0 ? -ret : ret; - - // NaN, +-Inf, or x small enough that asinh(x) = x - ret = ax >= PINFBITPATT_DP64 || absx < rteps ? x : ret; - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_asinh, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_asinh, __builtin_asinhf, half) - -#endif +#define FUNCTION __spirv_ocl_asinh +#define __CLC_FUNCTION(x) __clc_asinh +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/asinpi.cl b/libclc/libspirv/lib/generic/math/asinpi.cl index b71782651d62b..f15e29da3b3b4 100644 --- a/libclc/libspirv/lib/generic/math/asinpi.cl +++ b/libclc/libspirv/lib/generic/math/asinpi.cl @@ -6,171 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_asinpi(float x) { - // Computes arcsin(x). - // The argument is first reduced by noting that arcsin(x) - // is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). - // For denormal and small arguments arcsin(x) = x to machine - // accuracy. Remaining argument ranges are handled as follows. - // For abs(x) <= 0.5 use - // arcsin(x) = x + x^3*R(x^2) - // where R(x^2) is a rational minimax approximation to - // (arcsin(x) - x)/x^3. - // For abs(x) > 0.5 exploit the identity: - // arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) - // together with the above rational approximation, and - // reconstruct the terms carefully. - - const float pi = 3.1415926535897933e+00f; - const float piby2_tail = 7.5497894159e-08F; /* 0x33a22168 */ - const float hpiby2_head = 7.8539812565e-01F; /* 0x3f490fda */ - - uint ux = __clc_as_uint(x); - uint aux = ux & EXSIGNBIT_SP32; - uint xs = ux ^ aux; - float shalf = __clc_as_float(xs | __clc_as_uint(0.5f)); - - int xexp = (int)(aux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - - float y = __clc_as_float(aux); - - // abs(x) >= 0.5 - int transform = xexp >= -1; - - float y2 = y * y; - float rt = 0.5f * (1.0f - y); - float r = transform ? rt : y2; - - // Use a rational approximation for [0.0, 0.5] - float a = __spirv_ocl_mad( - r, - __spirv_ocl_mad(r, - __spirv_ocl_mad(r, -0.00396137437848476485201154797087F, - -0.0133819288943925804214011424456F), - -0.0565298683201845211985026327361F), - 0.184161606965100694821398249421F); - float b = __spirv_ocl_mad(r, -0.836411276854206731913362287293F, - 1.10496961524520294485512696706F); - float u = r * MATH_DIVIDE(a, b); - - float s = MATH_SQRT(r); - float s1 = __clc_as_float(__clc_as_uint(s) & 0xffff0000); - float c = MATH_DIVIDE(__spirv_ocl_mad(-s1, s1, r), s + s1); - float p = - __spirv_ocl_mad(2.0f * s, u, -__spirv_ocl_mad(c, -2.0f, piby2_tail)); - float q = __spirv_ocl_mad(s1, -2.0f, hpiby2_head); - float vt = hpiby2_head - (p - q); - float v = __spirv_ocl_mad(y, u, y); - v = transform ? vt : v; - v = MATH_DIVIDE(v, pi); - float xbypi = MATH_DIVIDE(x, pi); - - float ret = __clc_as_float(xs | __clc_as_uint(v)); - ret = aux > 0x3f800000U ? __clc_as_float(QNANBITPATT_SP32) : ret; - ret = aux == 0x3f800000U ? shalf : ret; - ret = xexp < -14 ? xbypi : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_asinpi, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_asinpi(double x) { - // Computes arcsin(x). - // The argument is first reduced by noting that arcsin(x) - // is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). - // For denormal and small arguments arcsin(x) = x to machine - // accuracy. Remaining argument ranges are handled as follows. - // For abs(x) <= 0.5 use - // arcsin(x) = x + x^3*R(x^2) - // where R(x^2) is a rational minimax approximation to - // (arcsin(x) - x)/x^3. - // For abs(x) > 0.5 exploit the identity: - // arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) - // together with the above rational approximation, and - // reconstruct the terms carefully. - - const double pi = 0x1.921fb54442d18p+1; - const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */ - const double hpiby2_head = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */ - - double y = __spirv_ocl_fabs(x); - int xneg = __clc_as_int2(x).hi < 0; - int xexp = (__clc_as_int2(y).hi >> 20) - EXPBIAS_DP64; - - // abs(x) >= 0.5 - int transform = xexp >= -1; - - double rt = 0.5 * (1.0 - y); - double y2 = y * y; - double r = transform ? rt : y2; - - // Use a rational approximation for [0.0, 0.5] - double un = __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma(r, 0.0000482901920344786991880522822991, - 0.00109242697235074662306043804220), - -0.0549989809235685841612020091328), - 0.275558175256937652532686256258), - -0.445017216867635649900123110649), - 0.227485835556935010735943483075); - - double ud = __spirv_ocl_fma( - r, - __spirv_ocl_fma( - r, - __spirv_ocl_fma(r, - __spirv_ocl_fma(r, 0.105869422087204370341222318533, - -0.943639137032492685763471240072), - 2.76568859157270989520376345954), - -3.28431505720958658909889444194), - 1.36491501334161032038194214209); - - double u = r * MATH_DIVIDE(un, ud); - - // Reconstruct asin carefully in transformed region - double s = __spirv_ocl_sqrt(r); - double sh = __clc_as_double(__clc_as_ulong(s) & 0xffffffff00000000UL); - double c = MATH_DIVIDE(__spirv_ocl_fma(-sh, sh, r), s + sh); - double p = __spirv_ocl_fma(2.0 * s, u, -__spirv_ocl_fma(-2.0, c, piby2_tail)); - double q = __spirv_ocl_fma(-2.0, sh, hpiby2_head); - double vt = hpiby2_head - (p - q); - double v = __spirv_ocl_fma(y, u, y); - v = transform ? vt : v; - - v = xexp < -28 ? y : v; - v = MATH_DIVIDE(v, pi); - v = xexp >= 0 ? __clc_as_double(QNANBITPATT_DP64) : v; - v = y == 1.0 ? 0.5 : v; - return xneg ? -v : v; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_asinpi, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_asinpi(half x) { - float t = x; - return __spirv_ocl_asinpi(t); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_asinpi, half) - -#endif +#define FUNCTION __spirv_ocl_asinpi +#define __CLC_FUNCTION(x) __clc_asinpi +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/atan.cl b/libclc/libspirv/lib/generic/math/atan.cl index a1185c762b890..9ccd1d8b2ee61 100644 --- a/libclc/libspirv/lib/generic/math/atan.cl +++ b/libclc/libspirv/lib/generic/math/atan.cl @@ -6,178 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_atan(float x) { - const float piby2 = 1.5707963267948966f; // 0x3ff921fb54442d18 - - uint ux = __clc_as_uint(x); - uint aux = ux & EXSIGNBIT_SP32; - uint sx = ux ^ aux; - - float spiby2 = __clc_as_float(sx | __clc_as_uint(piby2)); - - float v = __clc_as_float(aux); - - // Return for NaN - float ret = x; - - // 2^26 <= |x| <= Inf => atan(x) is close to piby2 - ret = aux <= PINFBITPATT_SP32 ? spiby2 : ret; - - // Reduce arguments 2^-19 <= |x| < 2^26 - - // 39/16 <= x < 2^26 - x = -MATH_RECIP(v); - float c = 1.57079632679489655800f; // atan(infinity) - - // 19/16 <= x < 39/16 - int l = aux < 0x401c0000; - float xx = MATH_DIVIDE(v - 1.5f, __spirv_ocl_mad(v, 1.5f, 1.0f)); - x = l ? xx : x; - c = l ? 9.82793723247329054082e-01f : c; // atan(1.5) - - // 11/16 <= x < 19/16 - l = aux < 0x3f980000U; - xx = MATH_DIVIDE(v - 1.0f, 1.0f + v); - x = l ? xx : x; - c = l ? 7.85398163397448278999e-01f : c; // atan(1) - - // 7/16 <= x < 11/16 - l = aux < 0x3f300000; - xx = MATH_DIVIDE(__spirv_ocl_mad(v, 2.0f, -1.0f), 2.0f + v); - x = l ? xx : x; - c = l ? 4.63647609000806093515e-01f : c; // atan(0.5) - - // 2^-19 <= x < 7/16 - l = aux < 0x3ee00000; - x = l ? v : x; - c = l ? 0.0f : c; - - // Core approximation: Remez(2,2) on [-7/16,7/16] - - float s = x * x; - float a = - __spirv_ocl_mad(s, - __spirv_ocl_mad(s, 0.470677934286149214138357545549e-2f, - 0.192324546402108583211697690500f), - 0.296528598819239217902158651186f); - - float b = - __spirv_ocl_mad(s, - __spirv_ocl_mad(s, 0.299309699959659728404442796915f, - 0.111072499995399550138837673349e1f), - 0.889585796862432286486651434570f); - - float q = x * s * MATH_DIVIDE(a, b); - - float z = c - (q - x); - float zs = __clc_as_float(sx | __clc_as_uint(z)); - - ret = aux < 0x4c800000 ? zs : ret; - - // |x| < 2^-19 - ret = aux < 0x36000000 ? __clc_as_float(ux) : ret; - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_atan, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_atan(double x) { - const double piby2 = 1.5707963267948966e+00; // 0x3ff921fb54442d18 - - double v = __spirv_ocl_fabs(x); - - // 2^56 > v > 39/16 - double a = -1.0; - double b = v; - // (chi + clo) = arctan(infinity) - double chi = 1.57079632679489655800e+00; - double clo = 6.12323399573676480327e-17; - - double ta = v - 1.5; - double tb = 1.0 + 1.5 * v; - int l = v <= 0x1.38p+1; // 39/16 > v > 19/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(1.5) - chi = l ? 9.82793723247329054082e-01 : chi; - clo = l ? 1.39033110312309953701e-17 : clo; - - ta = v - 1.0; - tb = 1.0 + v; - l = v <= 0x1.3p+0; // 19/16 > v > 11/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(1.) - chi = l ? 7.85398163397448278999e-01 : chi; - clo = l ? 3.06161699786838240164e-17 : clo; - - ta = 2.0 * v - 1.0; - tb = 2.0 + v; - l = v <= 0x1.6p-1; // 11/16 > v > 7/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(0.5) - chi = l ? 4.63647609000806093515e-01 : chi; - clo = l ? 2.26987774529616809294e-17 : clo; - - l = v <= 0x1.cp-2; // v < 7/16 - a = l ? v : a; - b = l ? 1.0 : b; - ; - chi = l ? 0.0 : chi; - clo = l ? 0.0 : clo; - - // Core approximation: Remez(4,4) on [-7/16,7/16] - double r = a / b; - double s = r * r; - double qn = __spirv_ocl_fma( - s, - __spirv_ocl_fma( - s, - __spirv_ocl_fma(s, - __spirv_ocl_fma(s, 0.142316903342317766e-3, - 0.304455919504853031e-1), - 0.220638780716667420e0), - 0.447677206805497472e0), - 0.268297920532545909e0); - - double qd = __spirv_ocl_fma( - s, - __spirv_ocl_fma( - s, - __spirv_ocl_fma(s, - __spirv_ocl_fma(s, 0.389525873944742195e-1, - 0.424602594203847109e0), - 0.141254259931958921e1), - 0.182596787737507063e1), - 0.804893761597637733e0); - - double q = r * s * qn / qd; - r = chi - ((q - clo) - r); - - double z = __spirv_IsNan(x) ? x : piby2; - z = v <= 0x1.0p+56 ? r : z; - z = v < 0x1.0p-26 ? v : z; - return x == v ? z : -z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atan, double); - -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_atan, __builtin_atanf16, half) - -#endif +#define FUNCTION __spirv_ocl_atan +#define __CLC_FUNCTION(x) __clc_atan +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/atanh.cl b/libclc/libspirv/lib/generic/math/atanh.cl index 1a5a4f71da2e0..40f3bad0b80ba 100644 --- a/libclc/libspirv/lib/generic/math/atanh.cl +++ b/libclc/libspirv/lib/generic/math/atanh.cl @@ -6,119 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_atanh(float x) { - uint ux = __clc_as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - uint xs = ux ^ ax; - - // |x| > 1 or NaN - float z = __clc_as_float(QNANBITPATT_SP32); - - // |x| == 1 - float t = __clc_as_float(xs | PINFBITPATT_SP32); - z = ax == 0x3f800000U ? t : z; - - // 1/2 <= |x| < 1 - t = __clc_as_float(ax); - t = MATH_DIVIDE(2.0f * t, 1.0f - t); - t = 0.5f * __spirv_ocl_log1p(t); - t = __clc_as_float(xs | __clc_as_uint(t)); - z = ax < 0x3f800000U ? t : z; - - // |x| < 1/2 - t = x * x; - float a = - __spirv_ocl_mad(__spirv_ocl_mad(0.92834212715e-2f, t, -0.28120347286e0f), - t, 0.39453629046e0f); - float b = - __spirv_ocl_mad(__spirv_ocl_mad(0.45281890445e0f, t, -0.15537744551e1f), - t, 0.11836088638e1f); - float p = MATH_DIVIDE(a, b); - t = __spirv_ocl_mad(x * t, p, x); - z = ax < 0x3f000000 ? t : z; - - // |x| < 2^-13 - z = ax < 0x39000000U ? x : z; - - return z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_atanh, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_atanh(double x) { - double absx = __spirv_ocl_fabs(x); - - double ret = - absx == 1.0 ? __clc_as_double(PINFBITPATT_DP64) : __clc_as_double(QNANBITPATT_DP64); - - // |x| >= 0.5 - // Note that atanh(x) = 0.5 * ln((1+x)/(1-x)) - // For greater accuracy we use - // ln((1+x)/(1-x)) = ln(1 + 2x/(1-x)) = log1p(2x/(1-x)). - double r = 0.5 * __spirv_ocl_log1p(2.0 * absx / (1.0 - absx)); - ret = absx < 1.0 ? r : ret; - - r = -ret; - ret = x < 0.0 ? r : ret; - - // Arguments up to 0.5 in magnitude are - // approximated by a [5,5] minimax polynomial - double t = x * x; - - double pn = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, - __spirv_ocl_fma(t, -0.10468158892753136958e-3, - 0.28728638600548514553e-1), - -0.28180210961780814148e0), - 0.88468142536501647470e0), - -0.11028356797846341457e1), - 0.47482573589747356373e0); - - double pd = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, - __spirv_ocl_fma(t, -0.35861554370169537512e-1, - 0.49561196555503101989e0), - -0.22608883748988489342e1), - 0.45414700626084508355e1), - -0.41631933639693546274e1), - 0.14244772076924206909e1); - - r = __spirv_ocl_fma(x * t, pn / pd, x); - ret = absx < 0.5 ? r : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atanh, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_atanh(half x) { - float t = x; - return __spirv_ocl_atanh(t); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_atanh, half) - -#endif +#define FUNCTION __spirv_ocl_atanh +#define __CLC_FUNCTION(x) __clc_atanh +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/atanpi.cl b/libclc/libspirv/lib/generic/math/atanpi.cl index c1557ffc83b7c..ae86758102eb3 100644 --- a/libclc/libspirv/lib/generic/math/atanpi.cl +++ b/libclc/libspirv/lib/generic/math/atanpi.cl @@ -6,184 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_atanpi(float x) { - const float pi = 3.1415926535897932f; - - uint ux = __clc_as_uint(x); - uint aux = ux & EXSIGNBIT_SP32; - uint sx = ux ^ aux; - - float xbypi = MATH_DIVIDE(x, pi); - float shalf = __clc_as_float(sx | __clc_as_uint(0.5f)); - - float v = __clc_as_float(aux); - - // Return for NaN - float ret = x; - - // 2^26 <= |x| <= Inf => atan(x) is close to piby2 - ret = aux <= PINFBITPATT_SP32 ? shalf : ret; - - // Reduce arguments 2^-19 <= |x| < 2^26 - - // 39/16 <= x < 2^26 - x = -MATH_RECIP(v); - float c = 1.57079632679489655800f; // atan(infinity) - - // 19/16 <= x < 39/16 - int l = aux < 0x401c0000; - float xx = MATH_DIVIDE(v - 1.5f, __spirv_ocl_mad(v, 1.5f, 1.0f)); - x = l ? xx : x; - c = l ? 9.82793723247329054082e-01f : c; // atan(1.5) - - // 11/16 <= x < 19/16 - l = aux < 0x3f980000U; - xx = MATH_DIVIDE(v - 1.0f, 1.0f + v); - x = l ? xx : x; - c = l ? 7.85398163397448278999e-01f : c; // atan(1) - - // 7/16 <= x < 11/16 - l = aux < 0x3f300000; - xx = MATH_DIVIDE(__spirv_ocl_mad(v, 2.0f, -1.0f), 2.0f + v); - x = l ? xx : x; - c = l ? 4.63647609000806093515e-01f : c; // atan(0.5) - - // 2^-19 <= x < 7/16 - l = aux < 0x3ee00000; - x = l ? v : x; - c = l ? 0.0f : c; - - // Core approximation: Remez(2,2) on [-7/16,7/16] - - float s = x * x; - float a = - __spirv_ocl_mad(s, - __spirv_ocl_mad(s, 0.470677934286149214138357545549e-2f, - 0.192324546402108583211697690500f), - 0.296528598819239217902158651186f); - - float b = - __spirv_ocl_mad(s, - __spirv_ocl_mad(s, 0.299309699959659728404442796915f, - 0.111072499995399550138837673349e1f), - 0.889585796862432286486651434570f); - - float q = x * s * MATH_DIVIDE(a, b); - - float z = c - (q - x); - z = MATH_DIVIDE(z, pi); - float zs = __clc_as_float(sx | __clc_as_uint(z)); - - ret = aux < 0x4c800000 ? zs : ret; - - // |x| < 2^-19 - ret = aux < 0x36000000 ? xbypi : ret; - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_atanpi, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_atanpi(double x) { - const double pi = 0x1.921fb54442d18p+1; - - double v = __spirv_ocl_fabs(x); - - // 2^56 > v > 39/16 - double a = -1.0; - double b = v; - // (chi + clo) = arctan(infinity) - double chi = 1.57079632679489655800e+00; - double clo = 6.12323399573676480327e-17; - - double ta = v - 1.5; - double tb = 1.0 + 1.5 * v; - int l = v <= 0x1.38p+1; // 39/16 > v > 19/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(1.5) - chi = l ? 9.82793723247329054082e-01 : chi; - clo = l ? 1.39033110312309953701e-17 : clo; - - ta = v - 1.0; - tb = 1.0 + v; - l = v <= 0x1.3p+0; // 19/16 > v > 11/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(1.) - chi = l ? 7.85398163397448278999e-01 : chi; - clo = l ? 3.06161699786838240164e-17 : clo; - - ta = 2.0 * v - 1.0; - tb = 2.0 + v; - l = v <= 0x1.6p-1; // 11/16 > v > 7/16 - a = l ? ta : a; - b = l ? tb : b; - // (chi + clo) = arctan(0.5) - chi = l ? 4.63647609000806093515e-01 : chi; - clo = l ? 2.26987774529616809294e-17 : clo; - - l = v <= 0x1.cp-2; // v < 7/16 - a = l ? v : a; - b = l ? 1.0 : b; - ; - chi = l ? 0.0 : chi; - clo = l ? 0.0 : clo; - - // Core approximation: Remez(4,4) on [-7/16,7/16] - double r = a / b; - double s = r * r; - double qn = __spirv_ocl_fma( - s, - __spirv_ocl_fma( - s, - __spirv_ocl_fma(s, - __spirv_ocl_fma(s, 0.142316903342317766e-3, - 0.304455919504853031e-1), - 0.220638780716667420e0), - 0.447677206805497472e0), - 0.268297920532545909e0); - - double qd = __spirv_ocl_fma( - s, - __spirv_ocl_fma( - s, - __spirv_ocl_fma(s, - __spirv_ocl_fma(s, 0.389525873944742195e-1, - 0.424602594203847109e0), - 0.141254259931958921e1), - 0.182596787737507063e1), - 0.804893761597637733e0); - - double q = r * s * qn / qd; - r = (chi - ((q - clo) - r)) / pi; - double vp = v / pi; - - double z = __spirv_IsNan(x) ? x : 0.5; - z = v <= 0x1.0p+56 ? r : z; - z = v < 0x1.0p-26 ? vp : z; - return x == v ? z : -z; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atanpi, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_atanpi(half x) { - float t = x; - return __spirv_ocl_atanpi(t); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_atanpi, half) - -#endif +#define FUNCTION __spirv_ocl_atanpi +#define __CLC_FUNCTION(x) __clc_atanpi +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/erf.cl b/libclc/libspirv/lib/generic/math/erf.cl index 1654f2761e3c2..61f2763930d32 100644 --- a/libclc/libspirv/lib/generic/math/erf.cl +++ b/libclc/libspirv/lib/generic/math/erf.cl @@ -6,545 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#define erx 8.4506291151e-01f /* 0x3f58560b */ - -// Coefficients for approximation to erf on [00.84375] - -#define efx 1.2837916613e-01f /* 0x3e0375d4 */ -#define efx8 1.0270333290e+00f /* 0x3f8375d4 */ - -#define pp0 1.2837916613e-01f /* 0x3e0375d4 */ -#define pp1 -3.2504209876e-01f /* 0xbea66beb */ -#define pp2 -2.8481749818e-02f /* 0xbce9528f */ -#define pp3 -5.7702702470e-03f /* 0xbbbd1489 */ -#define pp4 -2.3763017452e-05f /* 0xb7c756b1 */ -#define qq1 3.9791721106e-01f /* 0x3ecbbbce */ -#define qq2 6.5022252500e-02f /* 0x3d852a63 */ -#define qq3 5.0813062117e-03f /* 0x3ba68116 */ -#define qq4 1.3249473704e-04f /* 0x390aee49 */ -#define qq5 -3.9602282413e-06f /* 0xb684e21a */ - -// Coefficients for approximation to erf in [0.843751.25] - -#define pa0 -2.3621185683e-03f /* 0xbb1acdc6 */ -#define pa1 4.1485610604e-01f /* 0x3ed46805 */ -#define pa2 -3.7220788002e-01f /* 0xbebe9208 */ -#define pa3 3.1834661961e-01f /* 0x3ea2fe54 */ -#define pa4 -1.1089469492e-01f /* 0xbde31cc2 */ -#define pa5 3.5478305072e-02f /* 0x3d1151b3 */ -#define pa6 -2.1663755178e-03f /* 0xbb0df9c0 */ -#define qa1 1.0642088205e-01f /* 0x3dd9f331 */ -#define qa2 5.4039794207e-01f /* 0x3f0a5785 */ -#define qa3 7.1828655899e-02f /* 0x3d931ae7 */ -#define qa4 1.2617121637e-01f /* 0x3e013307 */ -#define qa5 1.3637083583e-02f /* 0x3c5f6e13 */ -#define qa6 1.1984500103e-02f /* 0x3c445aa3 */ - -// Coefficients for approximation to erfc in [1.251/0.35] - -#define ra0 -9.8649440333e-03f /* 0xbc21a093 */ -#define ra1 -6.9385856390e-01f /* 0xbf31a0b7 */ -#define ra2 -1.0558626175e+01f /* 0xc128f022 */ -#define ra3 -6.2375331879e+01f /* 0xc2798057 */ -#define ra4 -1.6239666748e+02f /* 0xc322658c */ -#define ra5 -1.8460508728e+02f /* 0xc3389ae7 */ -#define ra6 -8.1287437439e+01f /* 0xc2a2932b */ -#define ra7 -9.8143291473e+00f /* 0xc11d077e */ -#define sa1 1.9651271820e+01f /* 0x419d35ce */ -#define sa2 1.3765776062e+02f /* 0x4309a863 */ -#define sa3 4.3456588745e+02f /* 0x43d9486f */ -#define sa4 6.4538726807e+02f /* 0x442158c9 */ -#define sa5 4.2900814819e+02f /* 0x43d6810b */ -#define sa6 1.0863500214e+02f /* 0x42d9451f */ -#define sa7 6.5702495575e+00f /* 0x40d23f7c */ -#define sa8 -6.0424413532e-02f /* 0xbd777f97 */ - -// Coefficients for approximation to erfc in [1/.3528] - -#define rb0 -9.8649431020e-03f /* 0xbc21a092 */ -#define rb1 -7.9928326607e-01f /* 0xbf4c9dd4 */ -#define rb2 -1.7757955551e+01f /* 0xc18e104b */ -#define rb3 -1.6063638306e+02f /* 0xc320a2ea */ -#define rb4 -6.3756646729e+02f /* 0xc41f6441 */ -#define rb5 -1.0250950928e+03f /* 0xc480230b */ -#define rb6 -4.8351919556e+02f /* 0xc3f1c275 */ -#define sb1 3.0338060379e+01f /* 0x41f2b459 */ -#define sb2 3.2579251099e+02f /* 0x43a2e571 */ -#define sb3 1.5367296143e+03f /* 0x44c01759 */ -#define sb4 3.1998581543e+03f /* 0x4547fdbb */ -#define sb5 2.5530502930e+03f /* 0x451f90ce */ -#define sb6 4.7452853394e+02f /* 0x43ed43a7 */ -#define sb7 -2.2440952301e+01f /* 0xc1b38712 */ - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_erf(float x) { - int hx = __clc_as_uint(x); - int ix = hx & 0x7fffffff; - float absx = __clc_as_float(ix); - - float x2 = absx * absx; - float t = 1.0f / x2; - float tt = absx - 1.0f; - t = absx < 1.25f ? tt : t; - t = absx < 0.84375f ? x2 : t; - - float u, v, tu, tv; - - // |x| < 6 - u = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, rb6, rb5), rb4), - rb3), - rb2), - rb1), - rb0); - v = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, sb7, sb6), sb5), - sb4), - sb3), - sb2), - sb1); - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, ra7, ra6), ra5), - ra4), - ra3), - ra2), - ra1), - ra0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, sa8, sa7), sa6), - sa5), - sa4), - sa3), - sa2), - sa1); - u = absx < 0x1.6db6dcp+1f ? tu : u; - v = absx < 0x1.6db6dcp+1f ? tv : v; - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, pa6, pa5), pa4), - pa3), - pa2), - pa1), - pa0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, qa6, qa5), qa4), qa3), - qa2), - qa1); - u = absx < 1.25f ? tu : u; - v = absx < 1.25f ? tv : v; - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, pp4, pp3), pp2), - pp1), - pp0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, qq5, qq4), qq3), - qq2), - qq1); - u = absx < 0.84375f ? tu : u; - v = absx < 0.84375f ? tv : v; - - v = __spirv_ocl_mad(t, v, 1.0f); - float q = MATH_DIVIDE(u, v); - - float ret = 1.0f; - - // |x| < 6 - float z = __clc_as_float(ix & 0xfffff000); - float r = __spirv_ocl_exp(__spirv_ocl_mad(-z, z, -0.5625f)) * - __spirv_ocl_exp(__spirv_ocl_mad(z - absx, z + absx, q)); - r = 1.0f - MATH_DIVIDE(r, absx); - ret = absx < 6.0f ? r : ret; - - r = erx + q; - ret = absx < 1.25f ? r : ret; - - ret = __clc_as_float((hx & 0x80000000) | __clc_as_int(ret)); - - r = __spirv_ocl_mad(x, q, x); - ret = absx < 0.84375f ? r : ret; - - // Prevent underflow - r = 0.125f * __spirv_ocl_mad(8.0f, x, efx8 * x); - ret = absx < 0x1.0p-28f ? r : ret; - - ret = __spirv_IsNan(x) ? x : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_erf, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* double erf(double x) - * double erfc(double x) - * x - * 2 |\ - * erf(x) = --------- | exp(-t*t)dt - * sqrt(pi) \| - * 0 - * - * erfc(x) = 1-erf(x) - * Note that - * erf(-x) = -erf(x) - * erfc(-x) = 2 - erfc(x) - * - * Method: - * 1. For |x| in [0, 0.84375] - * erf(x) = x + x*R(x^2) - * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] - * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] - * where R = P/Q where P is an odd poly of degree 8 and - * Q is an odd poly of degree 10. - * -57.90 - * | R - (erf(x)-x)/x | <= 2 - * - * - * Remark. The formula is derived by noting - * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) - * and that - * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 - * is close to one. The interval is chosen because the fix - * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is - * near 0.6174), and by some experiment, 0.84375 is chosen to - * guarantee the error is less than one ulp for erf. - * - * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and - * c = 0.84506291151 rounded to single (24 bits) - * erf(x) = sign(x) * (c + P1(s)/Q1(s)) - * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 - * 1+(c+P1(s)/Q1(s)) if x < 0 - * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 - * Remark: here we use the taylor series expansion at x=1. - * erf(1+s) = erf(1) + s*Poly(s) - * = 0.845.. + P1(s)/Q1(s) - * That is, we use rational approximation to approximate - * erf(1+s) - (c = (single)0.84506291151) - * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] - * where - * P1(s) = degree 6 poly in s - * Q1(s) = degree 6 poly in s - * - * 3. For x in [1.25,1/0.35(~2.857143)], - * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) - * erf(x) = 1 - erfc(x) - * where - * R1(z) = degree 7 poly in z, (z=1/x^2) - * S1(z) = degree 8 poly in z - * - * 4. For x in [1/0.35,28] - * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 - * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 - * erf(x) = sign(x) *(1 - tiny) (raise inexact) - * erfc(x) = tiny*tiny (raise underflow) if x > 0 - * = 2 - tiny if x<0 - * - * 7. Special case: - * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, - * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, - * erfc/erf(NaN) is NaN - */ - -#define AU0 -9.86494292470009928597e-03 -#define AU1 -7.99283237680523006574e-01 -#define AU2 -1.77579549177547519889e+01 -#define AU3 -1.60636384855821916062e+02 -#define AU4 -6.37566443368389627722e+02 -#define AU5 -1.02509513161107724954e+03 -#define AU6 -4.83519191608651397019e+02 - -#define AV1 3.03380607434824582924e+01 -#define AV2 3.25792512996573918826e+02 -#define AV3 1.53672958608443695994e+03 -#define AV4 3.19985821950859553908e+03 -#define AV5 2.55305040643316442583e+03 -#define AV6 4.74528541206955367215e+02 -#define AV7 -2.24409524465858183362e+01 - -#define BU0 -9.86494403484714822705e-03 -#define BU1 -6.93858572707181764372e-01 -#define BU2 -1.05586262253232909814e+01 -#define BU3 -6.23753324503260060396e+01 -#define BU4 -1.62396669462573470355e+02 -#define BU5 -1.84605092906711035994e+02 -#define BU6 -8.12874355063065934246e+01 -#define BU7 -9.81432934416914548592e+00 - -#define BV1 1.96512716674392571292e+01 -#define BV2 1.37657754143519042600e+02 -#define BV3 4.34565877475229228821e+02 -#define BV4 6.45387271733267880336e+02 -#define BV5 4.29008140027567833386e+02 -#define BV6 1.08635005541779435134e+02 -#define BV7 6.57024977031928170135e+00 -#define BV8 -6.04244152148580987438e-02 - -#define CU0 -2.36211856075265944077e-03 -#define CU1 4.14856118683748331666e-01 -#define CU2 -3.72207876035701323847e-01 -#define CU3 3.18346619901161753674e-01 -#define CU4 -1.10894694282396677476e-01 -#define CU5 3.54783043256182359371e-02 -#define CU6 -2.16637559486879084300e-03 - -#define CV1 1.06420880400844228286e-01 -#define CV2 5.40397917702171048937e-01 -#define CV3 7.18286544141962662868e-02 -#define CV4 1.26171219808761642112e-01 -#define CV5 1.36370839120290507362e-02 -#define CV6 1.19844998467991074170e-02 - -#define DU0 1.28379167095512558561e-01 -#define DU1 -3.25042107247001499370e-01 -#define DU2 -2.84817495755985104766e-02 -#define DU3 -5.77027029648944159157e-03 -#define DU4 -2.37630166566501626084e-05 - -#define DV1 3.97917223959155352819e-01 -#define DV2 6.50222499887672944485e-02 -#define DV3 5.08130628187576562776e-03 -#define DV4 1.32494738004321644526e-04 -#define DV5 -3.96022827877536812320e-06 - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_erf(double y) { - double x = __spirv_ocl_fabs(y); - double x2 = x * x; - double xm1 = x - 1.0; - - // Poly variable - double t = 1.0 / x2; - t = x < 1.25 ? xm1 : t; - t = x < 0.84375 ? x2 : t; - - double u, ut, v, vt; - - // Evaluate rational poly - // XXX We need to see of we can grab 16 coefficents from a table - // faster than evaluating 3 of the poly pairs - // if (x < 6.0) - u = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, AU6, AU5), AU4), - AU3), - AU2), - AU1), - AU0); - v = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, AV7, AV6), AV5), - AV4), - AV3), - AV2), - AV1); - - ut = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, BU7, BU6), BU5), - BU4), - BU3), - BU2), - BU1), - BU0); - vt = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, BV8, BV7), BV6), - BV5), - BV4), - BV3), - BV2), - BV1); - u = x < 0x1.6db6ep+1 ? ut : u; - v = x < 0x1.6db6ep+1 ? vt : v; - - ut = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, CU6, CU5), CU4), - CU3), - CU2), - CU1), - CU0); - vt = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, CV6, CV5), CV4), CV3), - CV2), - CV1); - u = x < 1.25 ? ut : u; - v = x < 1.25 ? vt : v; - - ut = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DU4, DU3), DU2), - DU1), - DU0); - vt = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DV5, DV4), DV3), - DV2), - DV1); - u = x < 0.84375 ? ut : u; - v = x < 0.84375 ? vt : v; - - v = __spirv_ocl_fma(t, v, 1.0); - - // Compute rational approximation - double q = u / v; - - // Compute results - double z = __clc_as_double(__clc_as_long(x) & 0xffffffff00000000L); - double r = - __spirv_ocl_exp(-z * z - 0.5625) * __spirv_ocl_exp((z - x) * (z + x) + q); - r = 1.0 - r / x; - - double ret = x < 6.0 ? r : 1.0; - - r = 8.45062911510467529297e-01 + q; - ret = x < 1.25 ? r : ret; - - q = x < 0x1.0p-28 ? 1.28379167095512586316e-01 : q; - - r = __spirv_ocl_fma(x, q, x); - ret = x < 0.84375 ? r : ret; - - ret = __spirv_IsNan(x) ? x : ret; - - return y < 0.0 ? -ret : ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_erf, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erf, __builtin_erff, half) - -#endif +#define FUNCTION __spirv_ocl_erf +#define __CLC_FUNCTION(x) __clc_erf +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/erfc.cl b/libclc/libspirv/lib/generic/math/erfc.cl index f270448e3394c..c6caaac08d34e 100644 --- a/libclc/libspirv/lib/generic/math/erfc.cl +++ b/libclc/libspirv/lib/generic/math/erfc.cl @@ -6,554 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#define erx_f 8.4506291151e-01f /* 0x3f58560b */ - -// Coefficients for approximation to erf on [00.84375] - -#define efx 1.2837916613e-01f /* 0x3e0375d4 */ -#define efx8 1.0270333290e+00f /* 0x3f8375d4 */ - -#define pp0 1.2837916613e-01f /* 0x3e0375d4 */ -#define pp1 -3.2504209876e-01f /* 0xbea66beb */ -#define pp2 -2.8481749818e-02f /* 0xbce9528f */ -#define pp3 -5.7702702470e-03f /* 0xbbbd1489 */ -#define pp4 -2.3763017452e-05f /* 0xb7c756b1 */ -#define qq1 3.9791721106e-01f /* 0x3ecbbbce */ -#define qq2 6.5022252500e-02f /* 0x3d852a63 */ -#define qq3 5.0813062117e-03f /* 0x3ba68116 */ -#define qq4 1.3249473704e-04f /* 0x390aee49 */ -#define qq5 -3.9602282413e-06f /* 0xb684e21a */ - -// Coefficients for approximation to erf in [0.843751.25] - -#define pa0 -2.3621185683e-03f /* 0xbb1acdc6 */ -#define pa1 4.1485610604e-01f /* 0x3ed46805 */ -#define pa2 -3.7220788002e-01f /* 0xbebe9208 */ -#define pa3 3.1834661961e-01f /* 0x3ea2fe54 */ -#define pa4 -1.1089469492e-01f /* 0xbde31cc2 */ -#define pa5 3.5478305072e-02f /* 0x3d1151b3 */ -#define pa6 -2.1663755178e-03f /* 0xbb0df9c0 */ -#define qa1 1.0642088205e-01f /* 0x3dd9f331 */ -#define qa2 5.4039794207e-01f /* 0x3f0a5785 */ -#define qa3 7.1828655899e-02f /* 0x3d931ae7 */ -#define qa4 1.2617121637e-01f /* 0x3e013307 */ -#define qa5 1.3637083583e-02f /* 0x3c5f6e13 */ -#define qa6 1.1984500103e-02f /* 0x3c445aa3 */ - -// Coefficients for approximation to erfc in [1.251/0.35] - -#define ra0 -9.8649440333e-03f /* 0xbc21a093 */ -#define ra1 -6.9385856390e-01f /* 0xbf31a0b7 */ -#define ra2 -1.0558626175e+01f /* 0xc128f022 */ -#define ra3 -6.2375331879e+01f /* 0xc2798057 */ -#define ra4 -1.6239666748e+02f /* 0xc322658c */ -#define ra5 -1.8460508728e+02f /* 0xc3389ae7 */ -#define ra6 -8.1287437439e+01f /* 0xc2a2932b */ -#define ra7 -9.8143291473e+00f /* 0xc11d077e */ -#define sa1 1.9651271820e+01f /* 0x419d35ce */ -#define sa2 1.3765776062e+02f /* 0x4309a863 */ -#define sa3 4.3456588745e+02f /* 0x43d9486f */ -#define sa4 6.4538726807e+02f /* 0x442158c9 */ -#define sa5 4.2900814819e+02f /* 0x43d6810b */ -#define sa6 1.0863500214e+02f /* 0x42d9451f */ -#define sa7 6.5702495575e+00f /* 0x40d23f7c */ -#define sa8 -6.0424413532e-02f /* 0xbd777f97 */ - -// Coefficients for approximation to erfc in [1/.3528] - -#define rb0 -9.8649431020e-03f /* 0xbc21a092 */ -#define rb1 -7.9928326607e-01f /* 0xbf4c9dd4 */ -#define rb2 -1.7757955551e+01f /* 0xc18e104b */ -#define rb3 -1.6063638306e+02f /* 0xc320a2ea */ -#define rb4 -6.3756646729e+02f /* 0xc41f6441 */ -#define rb5 -1.0250950928e+03f /* 0xc480230b */ -#define rb6 -4.8351919556e+02f /* 0xc3f1c275 */ -#define sb1 3.0338060379e+01f /* 0x41f2b459 */ -#define sb2 3.2579251099e+02f /* 0x43a2e571 */ -#define sb3 1.5367296143e+03f /* 0x44c01759 */ -#define sb4 3.1998581543e+03f /* 0x4547fdbb */ -#define sb5 2.5530502930e+03f /* 0x451f90ce */ -#define sb6 4.7452853394e+02f /* 0x43ed43a7 */ -#define sb7 -2.2440952301e+01f /* 0xc1b38712 */ - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_erfc(float x) { - int hx = __clc_as_int(x); - int ix = hx & 0x7fffffff; - float absx = __clc_as_float(ix); - - // Argument for polys - float x2 = absx * absx; - float t = 1.0f / x2; - float tt = absx - 1.0f; - t = absx < 1.25f ? tt : t; - t = absx < 0.84375f ? x2 : t; - - // Evaluate polys - float tu, tv, u, v; - - u = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, rb6, rb5), rb4), - rb3), - rb2), - rb1), - rb0); - v = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, sb7, sb6), sb5), - sb4), - sb3), - sb2), - sb1); - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, ra7, ra6), ra5), - ra4), - ra3), - ra2), - ra1), - ra0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, sa8, sa7), sa6), - sa5), - sa4), - sa3), - sa2), - sa1); - u = absx < 0x1.6db6dap+1f ? tu : u; - v = absx < 0x1.6db6dap+1f ? tv : v; - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, pa6, pa5), pa4), - pa3), - pa2), - pa1), - pa0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, - __spirv_ocl_mad( - t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, qa6, qa5), qa4), qa3), - qa2), - qa1); - u = absx < 1.25f ? tu : u; - v = absx < 1.25f ? tv : v; - - tu = __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, pp4, pp3), pp2), - pp1), - pp0); - tv = __spirv_ocl_mad( - t, - __spirv_ocl_mad(t, __spirv_ocl_mad(t, __spirv_ocl_mad(t, qq5, qq4), qq3), - qq2), - qq1); - u = absx < 0.84375f ? tu : u; - v = absx < 0.84375f ? tv : v; - - v = __spirv_ocl_mad(t, v, 1.0f); - - float q = MATH_DIVIDE(u, v); - - float ret = 0.0f; - - float z = __clc_as_float(ix & 0xfffff000); - float r = __spirv_ocl_exp(__spirv_ocl_mad(-z, z, -0.5625f)) * - __spirv_ocl_exp(__spirv_ocl_mad(z - absx, z + absx, q)); - r = MATH_DIVIDE(r, absx); - t = 2.0f - r; - r = x < 0.0f ? t : r; - ret = absx < 28.0f ? r : ret; - - r = 1.0f - erx_f - q; - t = erx_f + q + 1.0f; - r = x < 0.0f ? t : r; - ret = absx < 1.25f ? r : ret; - - r = 0.5f - __spirv_ocl_mad(x, q, x - 0.5f); - ret = absx < 0.84375f ? r : ret; - - ret = x < -6.0f ? 2.0f : ret; - - ret = __spirv_IsNan(x) ? x : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_erfc, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* double erf(double x) - * double erfc(double x) - * x - * 2 |\ - * erf(x) = --------- | exp(-t*t)dt - * sqrt(pi) \| - * 0 - * - * erfc(x) = 1-erf(x) - * Note that - * erf(-x) = -erf(x) - * erfc(-x) = 2 - erfc(x) - * - * Method: - * 1. For |x| in [0, 0.84375] - * erf(x) = x + x*R(x^2) - * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] - * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] - * where R = P/Q where P is an odd poly of degree 8 and - * Q is an odd poly of degree 10. - * -57.90 - * | R - (erf(x)-x)/x | <= 2 - * - * - * Remark. The formula is derived by noting - * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) - * and that - * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 - * is close to one. The interval is chosen because the fix - * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is - * near 0.6174), and by some experiment, 0.84375 is chosen to - * guarantee the error is less than one ulp for erf. - * - * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and - * c = 0.84506291151 rounded to single (24 bits) - * erf(x) = sign(x) * (c + P1(s)/Q1(s)) - * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 - * 1+(c+P1(s)/Q1(s)) if x < 0 - * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 - * Remark: here we use the taylor series expansion at x=1. - * erf(1+s) = erf(1) + s*Poly(s) - * = 0.845.. + P1(s)/Q1(s) - * That is, we use rational approximation to approximate - * erf(1+s) - (c = (single)0.84506291151) - * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] - * where - * P1(s) = degree 6 poly in s - * Q1(s) = degree 6 poly in s - * - * 3. For x in [1.25,1/0.35(~2.857143)], - * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) - * erf(x) = 1 - erfc(x) - * where - * R1(z) = degree 7 poly in z, (z=1/x^2) - * S1(z) = degree 8 poly in z - * - * 4. For x in [1/0.35,28] - * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 - * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 - * erf(x) = sign(x) *(1 - tiny) (raise inexact) - * erfc(x) = tiny*tiny (raise underflow) if x > 0 - * = 2 - tiny if x<0 - * - * 7. Special case: - * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, - * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, - * erfc/erf(NaN) is NaN - */ - -#define AU0 -9.86494292470009928597e-03 -#define AU1 -7.99283237680523006574e-01 -#define AU2 -1.77579549177547519889e+01 -#define AU3 -1.60636384855821916062e+02 -#define AU4 -6.37566443368389627722e+02 -#define AU5 -1.02509513161107724954e+03 -#define AU6 -4.83519191608651397019e+02 - -#define AV0 3.03380607434824582924e+01 -#define AV1 3.25792512996573918826e+02 -#define AV2 1.53672958608443695994e+03 -#define AV3 3.19985821950859553908e+03 -#define AV4 2.55305040643316442583e+03 -#define AV5 4.74528541206955367215e+02 -#define AV6 -2.24409524465858183362e+01 - -#define BU0 -9.86494403484714822705e-03 -#define BU1 -6.93858572707181764372e-01 -#define BU2 -1.05586262253232909814e+01 -#define BU3 -6.23753324503260060396e+01 -#define BU4 -1.62396669462573470355e+02 -#define BU5 -1.84605092906711035994e+02 -#define BU6 -8.12874355063065934246e+01 -#define BU7 -9.81432934416914548592e+00 - -#define BV0 1.96512716674392571292e+01 -#define BV1 1.37657754143519042600e+02 -#define BV2 4.34565877475229228821e+02 -#define BV3 6.45387271733267880336e+02 -#define BV4 4.29008140027567833386e+02 -#define BV5 1.08635005541779435134e+02 -#define BV6 6.57024977031928170135e+00 -#define BV7 -6.04244152148580987438e-02 - -#define CU0 -2.36211856075265944077e-03 -#define CU1 4.14856118683748331666e-01 -#define CU2 -3.72207876035701323847e-01 -#define CU3 3.18346619901161753674e-01 -#define CU4 -1.10894694282396677476e-01 -#define CU5 3.54783043256182359371e-02 -#define CU6 -2.16637559486879084300e-03 - -#define CV0 1.06420880400844228286e-01 -#define CV1 5.40397917702171048937e-01 -#define CV2 7.18286544141962662868e-02 -#define CV3 1.26171219808761642112e-01 -#define CV4 1.36370839120290507362e-02 -#define CV5 1.19844998467991074170e-02 - -#define DU0 1.28379167095512558561e-01 -#define DU1 -3.25042107247001499370e-01 -#define DU2 -2.84817495755985104766e-02 -#define DU3 -5.77027029648944159157e-03 -#define DU4 -2.37630166566501626084e-05 - -#define DV0 3.97917223959155352819e-01 -#define DV1 6.50222499887672944485e-02 -#define DV2 5.08130628187576562776e-03 -#define DV3 1.32494738004321644526e-04 -#define DV4 -3.96022827877536812320e-06 - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_erfc(double x) { - long lx = __clc_as_long(x); - long ax = lx & 0x7fffffffffffffffL; - double absx = __clc_as_double(ax); - int xneg = lx != ax; - - // Poly arg - double x2 = x * x; - double xm1 = absx - 1.0; - double t = 1.0 / x2; - t = absx < 1.25 ? xm1 : t; - t = absx < 0.84375 ? x2 : t; - - // Evaluate rational poly - // XXX Need to evaluate if we can grab the 14 coefficients from a - // table faster than evaluating 3 pairs of polys - double tu, tv, u, v; - - // |x| < 28 - u = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, AU6, AU5), AU4), - AU3), - AU2), - AU1), - AU0); - v = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, AV6, AV5), AV4), - AV3), - AV2), - AV1), - AV0); - - tu = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, BU7, BU6), BU5), - BU4), - BU3), - BU2), - BU1), - BU0); - tv = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, BV7, BV6), BV5), - BV4), - BV3), - BV2), - BV1), - BV0); - u = absx < 0x1.6db6dp+1 ? tu : u; - v = absx < 0x1.6db6dp+1 ? tv : v; - - tu = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, CU6, CU5), CU4), - CU3), - CU2), - CU1), - CU0); - tv = __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, - __spirv_ocl_fma( - t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, CV5, CV4), CV3), CV2), - CV1), - CV0); - u = absx < 1.25 ? tu : u; - v = absx < 1.25 ? tv : v; - - tu = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DU4, DU3), DU2), - DU1), - DU0); - tv = __spirv_ocl_fma( - t, - __spirv_ocl_fma(t, __spirv_ocl_fma(t, __spirv_ocl_fma(t, DV4, DV3), DV2), - DV1), - DV0); - u = absx < 0.84375 ? tu : u; - v = absx < 0.84375 ? tv : v; - - v = __spirv_ocl_fma(t, v, 1.0); - double q = u / v; - - // Evaluate return value - - // |x| < 28 - double z = __clc_as_double(ax & 0xffffffff00000000UL); - double ret = __spirv_ocl_exp(-z * z - 0.5625) * - __spirv_ocl_exp((z - absx) * (z + absx) + q) / absx; - t = 2.0 - ret; - ret = xneg ? t : ret; - - const double erx = 8.45062911510467529297e-01; - z = erx + q + 1.0; - t = 1.0 - erx - q; - t = xneg ? z : t; - ret = absx < 1.25 ? t : ret; - - // z = 1.0 - fma(x, q, x); - // t = 0.5 - fma(x, q, x - 0.5); - // t = xneg == 1 | absx < 0.25 ? z : t; - t = __spirv_ocl_fma(-x, q, 1.0 - x); - ret = absx < 0.84375 ? t : ret; - - ret = x >= 28.0 ? 0.0 : ret; - ret = x <= -6.0 ? 2.0 : ret; - ret = ax > 0x7ff0000000000000UL ? x : ret; - - return ret; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_erfc, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_erfc, __builtin_erfcf, half) - -#endif +#define FUNCTION __spirv_ocl_erfc +#define __CLC_FUNCTION(x) __clc_erfc +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/fdim.cl b/libclc/libspirv/lib/generic/math/fdim.cl index 1db809555760a..d838ba31ff69d 100644 --- a/libclc/libspirv/lib/generic/math/fdim.cl +++ b/libclc/libspirv/lib/generic/math/fdim.cl @@ -6,19 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include - -#define __CLC_BODY +#define FUNCTION __spirv_ocl_fdim +#define __CLC_FUNCTION(x) __clc_fdim +#define __CLC_BODY #include - -#include - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_fdim, __builtin_fdimf, half, half) - -#endif diff --git a/libclc/libspirv/lib/generic/math/fdim.inc b/libclc/libspirv/lib/generic/math/fdim.inc deleted file mode 100644 index f3521a7d74914..0000000000000 --- a/libclc/libspirv/lib/generic/math/fdim.inc +++ /dev/null @@ -1,62 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 32 -#ifdef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fdim(__CLC_GENTYPE x, - __CLC_GENTYPE y) { - if (__builtin_isnan(x) || __builtin_isnan(y)) - return __clc_as_float(QNANBITPATT_SP32); - return __spirv_ocl_fmax(x - y, 0.0f); -} -#define __CLC_FDIM_VEC(width) \ - _CLC_OVERLOAD _CLC_DEF float##width __spirv_ocl_fdim(float##width x, \ - float##width y) { \ - /* Determine if x or y is NaN. */ \ - /* Vector true is -1, i.e. all-bits-set, and NaN==NaN is false. */ \ - /* If either is NaN, then ~((x==x) & (y==y)) will be 0 (e.g. ~(-1)), as \ - * will n. */ \ - int##width n = ~((x == x) & (y == y)) & QNANBITPATT_SP32; \ - /* Calculate x-y if x>y, otherwise positive 0, again taking */ \ - /* advantage of vector true being all-bits-set. */ \ - int##width r = (x > y) & __clc_as_int##width(x - y); \ - return __clc_as_float##width(n | r); \ - } -__CLC_FDIM_VEC(2) -__CLC_FDIM_VEC(3) -__CLC_FDIM_VEC(4) -__CLC_FDIM_VEC(8) -__CLC_FDIM_VEC(16) -#undef __CLC_FDIM_VEC -#endif -#endif - -#if __CLC_FPSIZE == 64 -#ifdef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fdim(__CLC_GENTYPE x, - private __CLC_GENTYPE y) { - long n = -(__spirv_IsNan(x) | __spirv_IsNan(y)) & QNANBITPATT_DP64; - long r = -(x > y) & __clc_as_long(x - y); - return __clc_as_double(n | r); -} -#define __CLC_FDIM_VEC(width) \ - _CLC_OVERLOAD _CLC_DEF double##width __spirv_ocl_fdim(double##width x, \ - double##width y) { \ - /* See comment in float implementation for explanation. */ \ - long##width n = ~((x == x) & (y == y)) & QNANBITPATT_DP64; \ - long##width r = (x > y) & __clc_as_long##width(x - y); \ - return __clc_as_double##width(n | r); \ - } -__CLC_FDIM_VEC(2) -__CLC_FDIM_VEC(3) -__CLC_FDIM_VEC(4) -__CLC_FDIM_VEC(8) -__CLC_FDIM_VEC(16) -#undef __CLC_FDIM_VEC -#endif -#endif diff --git a/libclc/libspirv/lib/generic/math/fma.cl b/libclc/libspirv/lib/generic/math/fma.cl index 79ea02e7b2e18..086b6693d3724 100644 --- a/libclc/libspirv/lib/generic/math/fma.cl +++ b/libclc/libspirv/lib/generic/math/fma.cl @@ -6,22 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, __spirv_ocl_fma, __clc_fma, float, float, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(double, __spirv_ocl_fma, __clc_fma, double, double, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(half, __spirv_ocl_fma, __clc_fma, half, half, half) - -#endif +#define FUNCTION __spirv_ocl_fma +#define __CLC_FUNCTION(x) __clc_fma +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/fmax.cl b/libclc/libspirv/lib/generic/math/fmax.cl index 0aaf62a859825..4279485189dd7 100644 --- a/libclc/libspirv/lib/generic/math/fmax.cl +++ b/libclc/libspirv/lib/generic/math/fmax.cl @@ -6,34 +6,10 @@ // //===----------------------------------------------------------------------===// -#include +#include #include -_CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_fmax, __builtin_fmaxf, float, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, __spirv_ocl_fmax, __builtin_fmax, double, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half __spirv_ocl_fmax(half x, half y) -{ - if (__spirv_IsNan(x)) - return y; - if (__spirv_IsNan(y)) - return x; - return (x < y) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_fmax, half, half) - -#endif - -#define __CLC_BODY +#define FUNCTION __spirv_ocl_fmax +#define __CLC_FUNCTION(x) __clc_fmax +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/fmax.inc b/libclc/libspirv/lib/generic/math/fmax.inc deleted file mode 100644 index deb76d95e2a56..0000000000000 --- a/libclc/libspirv/lib/generic/math/fmax.inc +++ /dev/null @@ -1,37 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if !defined(__CLC_SCALAR) - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmax(__CLC_GENTYPE x, - float y) { - return __spirv_ocl_fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmax(__CLC_GENTYPE x, - double y) { - return __spirv_ocl_fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#endif // ifdef cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmax(__CLC_GENTYPE x, half y) { - return __spirv_ocl_fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#endif // ifdef cl_khr_fp16 - -#endif // !defined(__CLC_SCALAR) diff --git a/libclc/libspirv/lib/generic/math/fmin.cl b/libclc/libspirv/lib/generic/math/fmin.cl index e43832a59a912..ccc4cdc5bf798 100644 --- a/libclc/libspirv/lib/generic/math/fmin.cl +++ b/libclc/libspirv/lib/generic/math/fmin.cl @@ -6,34 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include - -_CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_fmin, __builtin_fminf, float, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, __spirv_ocl_fmin, __builtin_fmin, double, double); - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half __spirv_ocl_fmin(half x, half y) -{ - if (__spirv_IsNan(x)) - return y; - if (__spirv_IsNan(y)) - return x; - return (y < x) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_fmin, half, half) - -#endif - -#define __CLC_BODY +#define FUNCTION __spirv_ocl_fmin +#define __CLC_FUNCTION(x) __clc_fmin +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/fmin.inc b/libclc/libspirv/lib/generic/math/fmin.inc deleted file mode 100644 index 6b4fe899bd03e..0000000000000 --- a/libclc/libspirv/lib/generic/math/fmin.inc +++ /dev/null @@ -1,37 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if !defined(__CLC_SCALAR) - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmin(__CLC_GENTYPE x, - float y) { - return __spirv_ocl_fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmin(__CLC_GENTYPE x, - double y) { - return __spirv_ocl_fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#endif // ifdef cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fmin(__CLC_GENTYPE x, half y) { - return __spirv_ocl_fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y)); -} - -#endif // ifdef cl_khr_fp16 - -#endif // !defined(__CLC_SCALAR) diff --git a/libclc/libspirv/lib/generic/math/fract.cl b/libclc/libspirv/lib/generic/math/fract.cl index cc2da2c00189a..4b4e1d52bcfdb 100644 --- a/libclc/libspirv/lib/generic/math/fract.cl +++ b/libclc/libspirv/lib/generic/math/fract.cl @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include #include -#define __CLC_BODY +#define FUNCTION __spirv_ocl_fract +#define __CLC_FUNCTION(x) __clc_fract +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/fract.inc b/libclc/libspirv/lib/generic/math/fract.inc deleted file mode 100644 index 01e61c6b975f4..0000000000000 --- a/libclc/libspirv/lib/generic/math/fract.inc +++ /dev/null @@ -1,45 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 64 -#define MIN_CONSTANT 0x1.fffffffffffffp-1 -#define ZERO (__CLC_GENTYPE)0.0 -#elif __CLC_FPSIZE == 32 -#define MIN_CONSTANT 0x1.fffffep-1f -#define ZERO (__CLC_GENTYPE)0.0f -#elif __CLC_FPSIZE == 16 -#define MIN_CONSTANT 0x1.ffcp-1h -#define ZERO (__CLC_GENTYPE)0.0h -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_fract(__CLC_GENTYPE x, __private __CLC_GENTYPE *iptr) { - *iptr = __spirv_ocl_floor(x); - __CLC_GENTYPE r = __spirv_ocl_fmin(x - *iptr, MIN_CONSTANT); - r = __CLC_CONVERT_BIT_INTN(__spirv_IsInf(x)) ? ZERO : r; - r = __CLC_CONVERT_BIT_INTN(__spirv_IsNan(x)) ? x : r; - return r; -} - -#define FRACT_DEF(addrspace) \ - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fract( \ - __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __private __CLC_GENTYPE private_iptr; \ - __CLC_GENTYPE ret = __spirv_ocl_fract(x, &private_iptr); \ - *iptr = private_iptr; \ - return ret; \ - } - -FRACT_DEF(local); -FRACT_DEF(global); -#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED -FRACT_DEF(generic); -#endif - -#undef MIN_CONSTANT -#undef ZERO diff --git a/libclc/libspirv/lib/generic/math/frexp.inc b/libclc/libspirv/lib/generic/math/frexp.inc deleted file mode 100644 index dd7f7aef02c67..0000000000000 --- a/libclc/libspirv/lib/generic/math/frexp.inc +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 32 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { - __CLC_INTN i = __CLC_AS_INTN(x); - __CLC_INTN ai = i & 0x7fffffff; - __CLC_INTN d = ai > 0 & ai < 0x00800000; - /* scale subnormal by 2^26 without multiplying */ - __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f; - ai = __spirv_ocl_select(ai, __CLC_AS_INTN(s), d); - __CLC_INTN e = - (ai >> 23) - 126 - __spirv_ocl_select((__CLC_INTN)0, (__CLC_INTN)26, d); - __CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129; - i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff); - *ep = __spirv_ocl_select(e, (__CLC_INTN)0, t); - return __spirv_ocl_select(__CLC_AS_GENTYPE(i), x, t); -} -#endif - -#if __CLC_FPSIZE == 64 - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { - __CLC_LONGN i = __CLC_AS_LONGN(x); - __CLC_LONGN ai = i & 0x7fffffffffffffffL; - __CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L; - // scale subnormal by 2^54 without multiplying - __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968; - ai = __spirv_ocl_select(ai, __CLC_AS_LONGN(s), d); - __CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 - - __spirv_ocl_select((__CLC_LONGN)0, (__CLC_LONGN)54, d); - __CLC_LONGN t = ai == 0 | e == 1025; - i = (i & (__CLC_LONGN)0x8000000000000000L) | - (__CLC_LONGN)0x3fe0000000000000L | - (ai & (__CLC_LONGN)0x000fffffffffffffL); - *ep = __CLC_CONVERT_INTN(__spirv_ocl_select(e, 0L, t)); - return __spirv_ocl_select(__CLC_AS_GENTYPE(i), x, t); -} - -#endif diff --git a/libclc/libspirv/lib/generic/math/ilogb.cl b/libclc/libspirv/lib/generic/math/ilogb.cl index f1805c6a5aa26..477cc71b3a4a6 100644 --- a/libclc/libspirv/lib/generic/math/ilogb.cl +++ b/libclc/libspirv/lib/generic/math/ilogb.cl @@ -6,52 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -_CLC_OVERLOAD _CLC_DEF int __spirv_ocl_ilogb(float x) { - uint ux = __clc_as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - int rs = -118 - (int)__spirv_ocl_clz(ux & MANTBITS_SP32); - int r = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - r = ax < 0x00800000U ? rs : r; - r = ax > EXPBITS_SP32 || ax == 0 ? 0x80000000 : r; - r = ax == EXPBITS_SP32 ? 0x7fffffff : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __spirv_ocl_ilogb, float); - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF int __spirv_ocl_ilogb(double x) { - ulong ux = __clc_as_ulong(x); - ulong ax = ux & ~SIGNBIT_DP64; - int r = (int)(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; - int rs = -1011 - (int)__spirv_ocl_clz(ax & MANTBITS_DP64); - r = ax < 0x0010000000000000UL ? rs : r; - r = ax > 0x7ff0000000000000UL || ax == 0UL ? 0x80000000 : r; - r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __spirv_ocl_ilogb, double); - -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD int __spirv_ocl_ilogb(half x) { - float f = x; - return __spirv_ocl_ilogb(f); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __spirv_ocl_ilogb, half) - - -#endif +#define FUNCTION __spirv_ocl_ilogb +#define __CLC_FUNCTION(x) __clc_ilogb +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/ldexp.inc b/libclc/libspirv/lib/generic/math/ldexp.inc deleted file mode 100644 index 2a2caed3c7a36..0000000000000 --- a/libclc/libspirv/lib/generic/math/ldexp.inc +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// TODO: Enable half precision when ldexp is implemented. -#if __CLC_FPSIZE > 16 - -#ifndef __CLC_SCALAR - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_ldexp(__CLC_GENTYPE x, int n) { - return __spirv_ocl_ldexp(x, (__CLC_INTN)n); -} - -#endif - -#endif diff --git a/libclc/libspirv/lib/generic/math/lgamma.cl b/libclc/libspirv/lib/generic/math/lgamma.cl index 68cc9d249ccfc..c26588255b26f 100644 --- a/libclc/libspirv/lib/generic/math/lgamma.cl +++ b/libclc/libspirv/lib/generic/math/lgamma.cl @@ -6,37 +6,11 @@ // //===----------------------------------------------------------------------===// -#include +#include #include -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_lgamma(float x) { - int s; - return __spirv_ocl_lgamma_r(x, &s); -} +#define FUNCTION __spirv_ocl_lgamma +#define __CLC_FUNCTION(x) __clc_lgamma +#define __CLC_BODY -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_lgamma, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_lgamma(double x) { - int s; - return __spirv_ocl_lgamma_r(x, &s); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_lgamma, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_lgamma(half x) { - int s; - return __spirv_ocl_lgamma_r(x, &s); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_lgamma, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/lgamma_r.cl b/libclc/libspirv/lib/generic/math/lgamma_r.cl index 688c1d315d40b..759fd6ed5a2ba 100644 --- a/libclc/libspirv/lib/generic/math/lgamma_r.cl +++ b/libclc/libspirv/lib/generic/math/lgamma_r.cl @@ -6,662 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#define pi_f 3.1415927410e+00f /* 0x40490fdb */ - -#define a0_f 7.7215664089e-02f /* 0x3d9e233f */ -#define a1_f 3.2246702909e-01f /* 0x3ea51a66 */ -#define a2_f 6.7352302372e-02f /* 0x3d89f001 */ -#define a3_f 2.0580807701e-02f /* 0x3ca89915 */ -#define a4_f 7.3855509982e-03f /* 0x3bf2027e */ -#define a5_f 2.8905137442e-03f /* 0x3b3d6ec6 */ -#define a6_f 1.1927076848e-03f /* 0x3a9c54a1 */ -#define a7_f 5.1006977446e-04f /* 0x3a05b634 */ -#define a8_f 2.2086278477e-04f /* 0x39679767 */ -#define a9_f 1.0801156895e-04f /* 0x38e28445 */ -#define a10_f 2.5214456400e-05f /* 0x37d383a2 */ -#define a11_f 4.4864096708e-05f /* 0x383c2c75 */ - -#define tc_f 1.4616321325e+00f /* 0x3fbb16c3 */ - -#define tf_f -1.2148628384e-01f /* 0xbdf8cdcd */ -/* tt -(tail of tf) */ -#define tt_f 6.6971006518e-09f /* 0x31e61c52 */ - -#define t0_f 4.8383611441e-01f /* 0x3ef7b95e */ -#define t1_f -1.4758771658e-01f /* 0xbe17213c */ -#define t2_f 6.4624942839e-02f /* 0x3d845a15 */ -#define t3_f -3.2788541168e-02f /* 0xbd064d47 */ -#define t4_f 1.7970675603e-02f /* 0x3c93373d */ -#define t5_f -1.0314224288e-02f /* 0xbc28fcfe */ -#define t6_f 6.1005386524e-03f /* 0x3bc7e707 */ -#define t7_f -3.6845202558e-03f /* 0xbb7177fe */ -#define t8_f 2.2596477065e-03f /* 0x3b141699 */ -#define t9_f -1.4034647029e-03f /* 0xbab7f476 */ -#define t10_f 8.8108185446e-04f /* 0x3a66f867 */ -#define t11_f -5.3859531181e-04f /* 0xba0d3085 */ -#define t12_f 3.1563205994e-04f /* 0x39a57b6b */ -#define t13_f -3.1275415677e-04f /* 0xb9a3f927 */ -#define t14_f 3.3552918467e-04f /* 0x39afe9f7 */ - -#define u0_f -7.7215664089e-02f /* 0xbd9e233f */ -#define u1_f 6.3282704353e-01f /* 0x3f2200f4 */ -#define u2_f 1.4549225569e+00f /* 0x3fba3ae7 */ -#define u3_f 9.7771751881e-01f /* 0x3f7a4bb2 */ -#define u4_f 2.2896373272e-01f /* 0x3e6a7578 */ -#define u5_f 1.3381091878e-02f /* 0x3c5b3c5e */ - -#define v1_f 2.4559779167e+00f /* 0x401d2ebe */ -#define v2_f 2.1284897327e+00f /* 0x4008392d */ -#define v3_f 7.6928514242e-01f /* 0x3f44efdf */ -#define v4_f 1.0422264785e-01f /* 0x3dd572af */ -#define v5_f 3.2170924824e-03f /* 0x3b52d5db */ - -#define s0_f -7.7215664089e-02f /* 0xbd9e233f */ -#define s1_f 2.1498242021e-01f /* 0x3e5c245a */ -#define s2_f 3.2577878237e-01f /* 0x3ea6cc7a */ -#define s3_f 1.4635047317e-01f /* 0x3e15dce6 */ -#define s4_f 2.6642270386e-02f /* 0x3cda40e4 */ -#define s5_f 1.8402845599e-03f /* 0x3af135b4 */ -#define s6_f 3.1947532989e-05f /* 0x3805ff67 */ - -#define r1_f 1.3920053244e+00f /* 0x3fb22d3b */ -#define r2_f 7.2193557024e-01f /* 0x3f38d0c5 */ -#define r3_f 1.7193385959e-01f /* 0x3e300f6e */ -#define r4_f 1.8645919859e-02f /* 0x3c98bf54 */ -#define r5_f 7.7794247773e-04f /* 0x3a4beed6 */ -#define r6_f 7.3266842264e-06f /* 0x36f5d7bd */ - -#define w0_f 4.1893854737e-01f /* 0x3ed67f1d */ -#define w1_f 8.3333335817e-02f /* 0x3daaaaab */ -#define w2_f -2.7777778450e-03f /* 0xbb360b61 */ -#define w3_f 7.9365057172e-04f /* 0x3a500cfd */ -#define w4_f -5.9518753551e-04f /* 0xba1c065c */ -#define w5_f 8.3633989561e-04f /* 0x3a5b3dd2 */ -#define w6_f -1.6309292987e-03f /* 0xbad5c4e8 */ - -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_lgamma_r(float x, private int *signp) { - int hx = __clc_as_int(x); - int ix = hx & 0x7fffffff; - float absx = __clc_as_float(ix); - - if (ix >= 0x7f800000) { - *signp = 1; - return x; - } - - if (absx < 0x1.0p-70f) { - *signp = hx < 0 ? -1 : 1; - return -__spirv_ocl_log(absx); - } - - float r; - - if (absx == 1.0f | absx == 2.0f) - r = 0.0f; - - else if (absx < 2.0f) { - float y = 2.0f - absx; - int i = 0; - - int c = absx < 0x1.bb4c30p+0f; - float yt = absx - tc_f; - y = c ? yt : y; - i = c ? 1 : i; - - c = absx < 0x1.3b4c40p+0f; - yt = absx - 1.0f; - y = c ? yt : y; - i = c ? 2 : i; - - r = -__spirv_ocl_log(absx); - yt = 1.0f - absx; - c = absx <= 0x1.ccccccp-1f; - r = c ? r : 0.0f; - y = c ? yt : y; - i = c ? 0 : i; - - c = absx < 0x1.769440p-1f; - yt = absx - (tc_f - 1.0f); - y = c ? yt : y; - i = c ? 1 : i; - - c = absx < 0x1.da6610p-3f; - y = c ? absx : y; - i = c ? 2 : i; - - float z, w, p1, p2, p3, p; - switch (i) { - case 0: - z = y * y; - p1 = __spirv_ocl_mad( - z, - __spirv_ocl_mad( - z, - __spirv_ocl_mad( - z, __spirv_ocl_mad(z, __spirv_ocl_mad(z, a10_f, a8_f), a6_f), - a4_f), - a2_f), - a0_f); - p2 = - z * - __spirv_ocl_mad( - z, - __spirv_ocl_mad( - z, - __spirv_ocl_mad( - z, - __spirv_ocl_mad(z, __spirv_ocl_mad(z, a11_f, a9_f), a7_f), - a5_f), - a3_f), - a1_f); - p = __spirv_ocl_mad(y, p1, p2); - r += __spirv_ocl_mad(y, -0.5f, p); - break; - case 1: - z = y * y; - w = z * y; - p1 = __spirv_ocl_mad( - w, - __spirv_ocl_mad( - w, __spirv_ocl_mad(w, __spirv_ocl_mad(w, t12_f, t9_f), t6_f), - t3_f), - t0_f); - p2 = __spirv_ocl_mad( - w, - __spirv_ocl_mad( - w, __spirv_ocl_mad(w, __spirv_ocl_mad(w, t13_f, t10_f), t7_f), - t4_f), - t1_f); - p3 = __spirv_ocl_mad( - w, - __spirv_ocl_mad( - w, __spirv_ocl_mad(w, __spirv_ocl_mad(w, t14_f, t11_f), t8_f), - t5_f), - t2_f); - p = __spirv_ocl_mad( - z, p1, -__spirv_ocl_mad(w, -__spirv_ocl_mad(y, p3, p2), tt_f)); - r += tf_f + p; - break; - case 2: - p1 = y * - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad(y, __spirv_ocl_mad(y, u5_f, u4_f), u3_f), - u2_f), - u1_f), - u0_f); - p2 = __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, __spirv_ocl_mad(y, __spirv_ocl_mad(y, v5_f, v4_f), v3_f), - v2_f), - v1_f), - 1.0f); - r += __spirv_ocl_mad(y, -0.5f, MATH_DIVIDE(p1, p2)); - break; - } - } else if (absx < 8.0f) { - int i = (int)absx; - float y = absx - (float)i; - float p = y * __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, __spirv_ocl_mad(y, s6_f, s5_f), s4_f), - s3_f), - s2_f), - s1_f), - s0_f); - float q = __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, __spirv_ocl_mad(y, __spirv_ocl_mad(y, r6_f, r5_f), r4_f), - r3_f), - r2_f), - r1_f), - 1.0f); - r = __spirv_ocl_mad(y, 0.5f, MATH_DIVIDE(p, q)); - - float y6 = y + 6.0f; - float y5 = y + 5.0f; - float y4 = y + 4.0f; - float y3 = y + 3.0f; - float y2 = y + 2.0f; - - float z = 1.0f; - z *= i > 6 ? y6 : 1.0f; - z *= i > 5 ? y5 : 1.0f; - z *= i > 4 ? y4 : 1.0f; - z *= i > 3 ? y3 : 1.0f; - z *= i > 2 ? y2 : 1.0f; - - r += __spirv_ocl_log(z); - } else if (absx < 0x1.0p+58f) { - float z = 1.0f / absx; - float y = z * z; - float w = __spirv_ocl_mad( - z, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, - __spirv_ocl_mad( - y, __spirv_ocl_mad(y, __spirv_ocl_mad(y, w6_f, w5_f), w4_f), - w3_f), - w2_f), - w1_f), - w0_f); - r = __spirv_ocl_mad(absx - 0.5f, __spirv_ocl_log(absx) - 1.0f, w); - } else - // 2**58 <= x <= Inf - r = absx * (__spirv_ocl_log(absx) - 1.0f); - - int s = 1; - - if (x < 0.0f) { - float t = __spirv_ocl_sinpi(x); - r = __spirv_ocl_log(pi_f / __spirv_ocl_fabs(t * x)) - r; - r = t == 0.0f ? __clc_as_float(PINFBITPATT_SP32) : r; - s = t < 0.0f ? -1 : s; - } - - *signp = s; - return r; -} - -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_lgamma_r, - float, private, int) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -// ==================================================== -// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. -// -// Developed at SunPro, a Sun Microsystems, Inc. business. -// Permission to use, copy, modify, and distribute this -// software is freely granted, provided that this notice -// is preserved. -// ==================================================== - -// lgamma_r(x, i) -// Reentrant version of the logarithm of the Gamma function -// with user provide pointer for the sign of Gamma(x). -// -// Method: -// 1. Argument Reduction for 0 < x <= 8 -// Since gamma(1+s)=s*gamma(s), for x in [0,8], we may -// reduce x to a number in [1.5,2.5] by -// lgamma(1+s) = log(s) + lgamma(s) -// for example, -// lgamma(7.3) = log(6.3) + lgamma(6.3) -// = log(6.3*5.3) + lgamma(5.3) -// = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) -// 2. Polynomial approximation of lgamma around its -// minimun ymin=1.461632144968362245 to maintain monotonicity. -// On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use -// Let z = x-ymin; -// lgamma(x) = -1.214862905358496078218 + z^2*poly(z) -// where -// poly(z) is a 14 degree polynomial. -// 2. Rational approximation in the primary interval [2,3] -// We use the following approximation: -// s = x-2.0; -// lgamma(x) = 0.5*s + s*P(s)/Q(s) -// with accuracy -// |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 -// Our algorithms are based on the following observation -// -// zeta(2)-1 2 zeta(3)-1 3 -// lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... -// 2 3 -// -// where Euler = 0.5771... is the Euler constant, which is very -// close to 0.5. -// -// 3. For x>=8, we have -// lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... -// (better formula: -// lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) -// Let z = 1/x, then we approximation -// f(z) = lgamma(x) - (x-0.5)(log(x)-1) -// by -// 3 5 11 -// w = w0 + w1*z + w2*z + w3*z + ... + w6*z -// where -// |w - f(z)| < 2**-58.74 -// -// 4. For negative x, since (G is gamma function) -// -x*G(-x)*G(x) = pi/sin(pi*x), -// we have -// G(x) = pi/(sin(pi*x)*(-x)*G(-x)) -// since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 -// Hence, for x<0, signgam = sign(sin(pi*x)) and -// lgamma(x) = log(|Gamma(x)|) -// = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); -// Note: one should avoid compute pi*(-x) directly in the -// computation of sin(pi*(-x)). -// -// 5. Special Cases -// lgamma(2+s) ~ s*(1-Euler) for tiny s -// lgamma(1)=lgamma(2)=0 -// lgamma(x) ~ -log(x) for tiny x -// lgamma(0) = lgamma(inf) = inf -// lgamma(-integer) = +-inf -// -#define pi 3.14159265358979311600e+00 /* 0x400921FB, 0x54442D18 */ - -#define a0 7.72156649015328655494e-02 /* 0x3FB3C467, 0xE37DB0C8 */ -#define a1 3.22467033424113591611e-01 /* 0x3FD4A34C, 0xC4A60FAD */ -#define a2 6.73523010531292681824e-02 /* 0x3FB13E00, 0x1A5562A7 */ -#define a3 2.05808084325167332806e-02 /* 0x3F951322, 0xAC92547B */ -#define a4 7.38555086081402883957e-03 /* 0x3F7E404F, 0xB68FEFE8 */ -#define a5 2.89051383673415629091e-03 /* 0x3F67ADD8, 0xCCB7926B */ -#define a6 1.19270763183362067845e-03 /* 0x3F538A94, 0x116F3F5D */ -#define a7 5.10069792153511336608e-04 /* 0x3F40B6C6, 0x89B99C00 */ -#define a8 2.20862790713908385557e-04 /* 0x3F2CF2EC, 0xED10E54D */ -#define a9 1.08011567247583939954e-04 /* 0x3F1C5088, 0x987DFB07 */ -#define a10 2.52144565451257326939e-05 /* 0x3EFA7074, 0x428CFA52 */ -#define a11 4.48640949618915160150e-05 /* 0x3F07858E, 0x90A45837 */ - -#define tc 1.46163214496836224576e+00 /* 0x3FF762D8, 0x6356BE3F */ -#define tf -1.21486290535849611461e-01 /* 0xBFBF19B9, 0xBCC38A42 */ -#define tt -3.63867699703950536541e-18 /* 0xBC50C7CA, 0xA48A971F */ - -#define t0 4.83836122723810047042e-01 /* 0x3FDEF72B, 0xC8EE38A2 */ -#define t1 -1.47587722994593911752e-01 /* 0xBFC2E427, 0x8DC6C509 */ -#define t2 6.46249402391333854778e-02 /* 0x3FB08B42, 0x94D5419B */ -#define t3 -3.27885410759859649565e-02 /* 0xBFA0C9A8, 0xDF35B713 */ -#define t4 1.79706750811820387126e-02 /* 0x3F9266E7, 0x970AF9EC */ -#define t5 -1.03142241298341437450e-02 /* 0xBF851F9F, 0xBA91EC6A */ -#define t6 6.10053870246291332635e-03 /* 0x3F78FCE0, 0xE370E344 */ -#define t7 -3.68452016781138256760e-03 /* 0xBF6E2EFF, 0xB3E914D7 */ -#define t8 2.25964780900612472250e-03 /* 0x3F6282D3, 0x2E15C915 */ -#define t9 -1.40346469989232843813e-03 /* 0xBF56FE8E, 0xBF2D1AF1 */ -#define t10 8.81081882437654011382e-04 /* 0x3F4CDF0C, 0xEF61A8E9 */ -#define t11 -5.38595305356740546715e-04 /* 0xBF41A610, 0x9C73E0EC */ -#define t12 3.15632070903625950361e-04 /* 0x3F34AF6D, 0x6C0EBBF7 */ -#define t13 -3.12754168375120860518e-04 /* 0xBF347F24, 0xECC38C38 */ -#define t14 3.35529192635519073543e-04 /* 0x3F35FD3E, 0xE8C2D3F4 */ - -#define u0 -7.72156649015328655494e-02 /* 0xBFB3C467, 0xE37DB0C8 */ -#define u1 6.32827064025093366517e-01 /* 0x3FE4401E, 0x8B005DFF */ -#define u2 1.45492250137234768737e+00 /* 0x3FF7475C, 0xD119BD6F */ -#define u3 9.77717527963372745603e-01 /* 0x3FEF4976, 0x44EA8450 */ -#define u4 2.28963728064692451092e-01 /* 0x3FCD4EAE, 0xF6010924 */ -#define u5 1.33810918536787660377e-02 /* 0x3F8B678B, 0xBF2BAB09 */ - -#define v1 2.45597793713041134822e+00 /* 0x4003A5D7, 0xC2BD619C */ -#define v2 2.12848976379893395361e+00 /* 0x40010725, 0xA42B18F5 */ -#define v3 7.69285150456672783825e-01 /* 0x3FE89DFB, 0xE45050AF */ -#define v4 1.04222645593369134254e-01 /* 0x3FBAAE55, 0xD6537C88 */ -#define v5 3.21709242282423911810e-03 /* 0x3F6A5ABB, 0x57D0CF61 */ - -#define s0_d -7.72156649015328655494e-02 /* 0xBFB3C467, 0xE37DB0C8 */ -#define s1_d 2.14982415960608852501e-01 /* 0x3FCB848B, 0x36E20878 */ -#define s2_d 3.25778796408930981787e-01 /* 0x3FD4D98F, 0x4F139F59 */ -#define s3_d 1.46350472652464452805e-01 /* 0x3FC2BB9C, 0xBEE5F2F7 */ -#define s4_d 2.66422703033638609560e-02 /* 0x3F9B481C, 0x7E939961 */ -#define s5_d 1.84028451407337715652e-03 /* 0x3F5E26B6, 0x7368F239 */ -#define s6_d 3.19475326584100867617e-05 /* 0x3F00BFEC, 0xDD17E945 */ - -#define r1 1.39200533467621045958e+00 /* 0x3FF645A7, 0x62C4AB74 */ -#define r2 7.21935547567138069525e-01 /* 0x3FE71A18, 0x93D3DCDC */ -#define r3 1.71933865632803078993e-01 /* 0x3FC601ED, 0xCCFBDF27 */ -#define r4 1.86459191715652901344e-02 /* 0x3F9317EA, 0x742ED475 */ -#define r5 7.77942496381893596434e-04 /* 0x3F497DDA, 0xCA41A95B */ -#define r6 7.32668430744625636189e-06 /* 0x3EDEBAF7, 0xA5B38140 */ - -#define w0 4.18938533204672725052e-01 /* 0x3FDACFE3, 0x90C97D69 */ -#define w1 8.33333333333329678849e-02 /* 0x3FB55555, 0x5555553B */ -#define w2 -2.77777777728775536470e-03 /* 0xBF66C16C, 0x16B02E5C */ -#define w3 7.93650558643019558500e-04 /* 0x3F4A019F, 0x98CF38B6 */ -#define w4 -5.95187557450339963135e-04 /* 0xBF4380CB, 0x8C0FE741 */ -#define w5 8.36339918996282139126e-04 /* 0x3F4B67BA, 0x4CDAD5D1 */ -#define w6 -1.63092934096575273989e-03 /* 0xBF5AB89D, 0x0B9E43E4 */ - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_lgamma_r(double x, private int *ip) { - ulong ux = __clc_as_ulong(x); - ulong ax = ux & EXSIGNBIT_DP64; - double absx = __clc_as_double(ax); - - if (ax >= 0x7ff0000000000000UL) { - // +-Inf, NaN - *ip = 1; - return absx; - } - - if (absx < 0x1.0p-70) { - *ip = ax == ux ? 1 : -1; - return -__spirv_ocl_log(absx); - } - - // Handle rest of range - double r; - - if (absx < 2.0) { - int i = 0; - double y = 2.0 - absx; - - int c = absx < 0x1.bb4c3p+0; - double t = absx - tc; - i = c ? 1 : i; - y = c ? t : y; - - c = absx < 0x1.3b4c4p+0; - t = absx - 1.0; - i = c ? 2 : i; - y = c ? t : y; - - c = absx <= 0x1.cccccp-1; - t = -__spirv_ocl_log(absx); - r = c ? t : 0.0; - t = 1.0 - absx; - i = c ? 0 : i; - y = c ? t : y; - - c = absx < 0x1.76944p-1; - t = absx - (tc - 1.0); - i = c ? 1 : i; - y = c ? t : y; - - c = absx < 0x1.da661p-3; - i = c ? 2 : i; - y = c ? absx : y; - - double p, q; - - switch (i) { - case 0: - p = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, a11, a10), a9), a8), - a7); - p = __spirv_ocl_fma( - y, - __spirv_ocl_fma(y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, p, a6), a5), - a4), - a3); - p = __spirv_ocl_fma(y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, p, a2), a1), - a0); - r = __spirv_ocl_fma(y, p - 0.5, r); - break; - case 1: - p = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, t14, t13), t12), t11), - t10); - p = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, p, t9), t8), t7), - t6), - t5); - p = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, p, t4), t3), t2), - t1), - t0); - p = __spirv_ocl_fma(y * y, p, -tt); - r += (tf + p); - break; - case 2: - p = y * __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, u5, u4), u3), - u2), - u1), - u0); - q = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, v5, v4), v3), v2), - v1), - 1.0); - r += __spirv_ocl_fma(-0.5, y, p / q); - } - } else if (absx < 8.0) { - int i = (int)(long)absx; - double y = absx - (double)i; - double p = - y * - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, s6_d, s5_d), s4_d), - s3_d), - s2_d), - s1_d), - s0_d); - double q = __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, - __spirv_ocl_fma( - y, __spirv_ocl_fma(y, __spirv_ocl_fma(y, r6, r5), r4), r3), - r2), - r1), - 1.0); - r = __spirv_ocl_fma(0.5, y, p / q); - double z = 1.0; - // lgamma(1+s) = log(s) + lgamma(s) - double y6 = y + 6.0; - double y5 = y + 5.0; - double y4 = y + 4.0; - double y3 = y + 3.0; - double y2 = y + 2.0; - z *= i > 6 ? y6 : 1.0; - z *= i > 5 ? y5 : 1.0; - z *= i > 4 ? y4 : 1.0; - z *= i > 3 ? y3 : 1.0; - z *= i > 2 ? y2 : 1.0; - r += __spirv_ocl_log(z); - } else { - double z = 1.0 / absx; - double z2 = z * z; - double w = __spirv_ocl_fma( - z, - __spirv_ocl_fma( - z2, - __spirv_ocl_fma( - z2, - __spirv_ocl_fma( - z2, __spirv_ocl_fma(z2, __spirv_ocl_fma(z2, w6, w5), w4), - w3), - w2), - w1), - w0); - r = (absx - 0.5) * (__spirv_ocl_log(absx) - 1.0) + w; - } - - if (x < 0.0) { - double t = __spirv_ocl_sinpi(x); - r = __spirv_ocl_log(pi / __spirv_ocl_fabs(t * x)) - r; - r = t == 0.0 ? __clc_as_double(PINFBITPATT_DP64) : r; - *ip = t < 0.0 ? -1 : 1; - } else - *ip = 1; - - return r; -} - -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_lgamma_r, - double, private, int) -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_lgamma_r(half x, private int *signp) { - return __spirv_ocl_lgamma_r((float)x, signp); -} - -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_lgamma_r, half, - private, int) - -#endif - -#define __CLC_ADDRSPACE global -#define __CLC_BODY -#include -#undef __CLC_ADDRSPACE - -#define __CLC_ADDRSPACE local -#define __CLC_BODY -#include -#undef __CLC_ADDRSPACE - -#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED -#define __CLC_ADDRSPACE generic -#define __CLC_BODY +#define FUNCTION __spirv_ocl_lgamma_r +#define __CLC_FUNCTION(x) __clc_lgamma_r +#define __CLC_BODY #include -#undef __CLC_ADDRSPACE -#endif diff --git a/libclc/libspirv/lib/generic/math/lgamma_r.inc b/libclc/libspirv/lib/generic/math/lgamma_r.inc deleted file mode 100644 index b4c12c9390785..0000000000000 --- a/libclc/libspirv/lib/generic/math/lgamma_r.inc +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { - __private __CLC_INTN private_iptr; - __CLC_GENTYPE ret = __spirv_ocl_lgamma_r(x, &private_iptr); - *iptr = private_iptr; - return ret; -} diff --git a/libclc/libspirv/lib/generic/math/log.cl b/libclc/libspirv/lib/generic/math/log.cl index a08681a8bbcd8..0b94736b4676d 100644 --- a/libclc/libspirv/lib/generic/math/log.cl +++ b/libclc/libspirv/lib/generic/math/log.cl @@ -6,37 +6,11 @@ // //===----------------------------------------------------------------------===// -#include +#include #include -/* - *log(x) = log2(x) * (1/log2(e)) - */ +#define FUNCTION __spirv_ocl_log +#define __CLC_FUNCTION(x) __clc_log +#define __CLC_BODY -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_log(float x) -{ - return __spirv_ocl_log2(x) * (1.0f / M_LOG2E_F); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_log, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_log(double x) -{ - return __spirv_ocl_log2(x) * (1.0 / M_LOG2E); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log, double); - -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log, __builtin_logf16, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/log10.cl b/libclc/libspirv/lib/generic/math/log10.cl index b04362de7ddd1..400717f776553 100644 --- a/libclc/libspirv/lib/generic/math/log10.cl +++ b/libclc/libspirv/lib/generic/math/log10.cl @@ -6,29 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include +#define FUNCTION __spirv_ocl_log10 +#define __CLC_FUNCTION(x) __clc_log10 +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif // cl_khr_fp64 - -#define COMPILING_LOG10 -#include "log_base.h" -#undef COMPILING_LOG10 - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_log10, float); - -#ifdef cl_khr_fp64 -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log10, double); -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log10, __builtin_log10f16, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/log2.cl b/libclc/libspirv/lib/generic/math/log2.cl index 5f7489fa8b153..0a6405efdc0e7 100644 --- a/libclc/libspirv/lib/generic/math/log2.cl +++ b/libclc/libspirv/lib/generic/math/log2.cl @@ -6,29 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include +#define FUNCTION __spirv_ocl_log2 +#define __CLC_FUNCTION(x) __clc_log2 +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif // cl_khr_fp64 - -#define COMPILING_LOG2 -#include "log_base.h" -#undef COMPILING_LOG2 - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_log2, float); - -#ifdef cl_khr_fp64 -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_log2, double); -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_log2, __builtin_log2f16, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/log_base.h b/libclc/libspirv/lib/generic/math/log_base.h deleted file mode 100644 index 94c76e563c97d..0000000000000 --- a/libclc/libspirv/lib/generic/math/log_base.h +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2014,2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include - -/* - Algorithm: - - Based on: - Ping-Tak Peter Tang - "Table-driven implementation of the logarithm function in IEEE - floating-point arithmetic" - ACM Transactions on Mathematical Software (TOMS) - Volume 16, Issue 4 (December 1990) - - - x very close to 1.0 is handled differently, for x everywhere else - a brief explanation is given below - - x = (2^m)*A - x = (2^m)*(G+g) with (1 <= G < 2) and (g <= 2^(-8)) - x = (2^m)*2*(G/2+g/2) - x = (2^m)*2*(F+f) with (0.5 <= F < 1) and (f <= 2^(-9)) - - Y = (2^(-1))*(2^(-m))*(2^m)*A - Now, range of Y is: 0.5 <= Y < 1 - - F = 0x80 + (first 7 mantissa bits) + (8th mantissa bit) - Now, range of F is: 128 <= F <= 256 - F = F / 256 - Now, range of F is: 0.5 <= F <= 1 - - f = -(Y-F), with (f <= 2^(-9)) - - log(x) = m*log(2) + log(2) + log(F-f) - log(x) = m*log(2) + log(2) + log(F) + log(1-(f/F)) - log(x) = m*log(2) + log(2*F) + log(1-r) - - r = (f/F), with (r <= 2^(-8)) - r = f*(1/F) with (1/F) precomputed to avoid division - - log(x) = m*log(2) + log(G) - poly - - log(G) is precomputed - poly = (r + (r^2)/2 + (r^3)/3 + (r^4)/4) + (r^5)/5)) - - log(2) and log(G) need to be maintained in extra precision - to avoid losing precision in the calculations - - - For x close to 1.0, we employ the following technique to - ensure faster convergence. - - log(x) = log((1+s)/(1-s)) = 2*s + (2/3)*s^3 + (2/5)*s^5 + (2/7)*s^7 - x = ((1+s)/(1-s)) - x = 1 + r - s = r/(2+r) - -*/ - -_CLC_OVERLOAD _CLC_DEF float -#if defined(COMPILING_LOG2) -__spirv_ocl_log2(float x) -#elif defined(COMPILING_LOG10) -__spirv_ocl_log10(float x) -#else -__spirv_ocl_log(float x) -#endif -{ - -#if defined(COMPILING_LOG2) - const float LOG2E = 0x1.715476p+0f; // 1.4426950408889634 - const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375 - const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072 -#elif defined(COMPILING_LOG10) - const float LOG10E = 0x1.bcb7b2p-2f; // 0.43429448190325182 - const float LOG10E_HEAD = 0x1.bc0000p-2f; // 0.43359375 - const float LOG10E_TAIL = 0x1.6f62a4p-11f; // 0.0007007319 - const float LOG10_2_HEAD = 0x1.340000p-2f; // 0.30078125 - const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637 -#else - const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234 - const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833 -#endif - - uint xi = __clc_as_uint(x); - uint ax = xi & EXSIGNBIT_SP32; - - // Calculations for |x-1| < 2^-4 - float r = x - 1.0f; - int near1 = __spirv_ocl_fabs(r) < 0x1.0p-4f; - float u2 = MATH_DIVIDE(r, 2.0f + r); - float corr = u2 * r; - float u = u2 + u2; - float v = u * u; - float znear1, z1, z2; - - // 2/(5 * 2^5), 2/(3 * 2^3) - z2 = __spirv_ocl_mad( - u, __spirv_ocl_mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v, -corr); - -#if defined(COMPILING_LOG2) - z1 = __clc_as_float(__clc_as_int(r) & 0xffff0000); - z2 = z2 + (r - z1); - znear1 = __spirv_ocl_mad( - z1, LOG2E_HEAD, - __spirv_ocl_mad(z2, LOG2E_HEAD, - __spirv_ocl_mad(z1, LOG2E_TAIL, z2 * LOG2E_TAIL))); -#elif defined(COMPILING_LOG10) - z1 = __clc_as_float(__clc_as_int(r) & 0xffff0000); - z2 = z2 + (r - z1); - znear1 = __spirv_ocl_mad( - z1, LOG10E_HEAD, - __spirv_ocl_mad(z2, LOG10E_HEAD, - __spirv_ocl_mad(z1, LOG10E_TAIL, z2 * LOG10E_TAIL))); -#else - znear1 = z2 + r; -#endif - - // Calculations for x not near 1 - int m = (int)(xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - - // Normalize subnormal - uint xis = __clc_as_uint(__clc_as_float(xi | 0x3f800000) - 1.0f); - int ms = (int)(xis >> EXPSHIFTBITS_SP32) - 253; - int c = m == -127; - m = c ? ms : m; - uint xin = c ? xis : xi; - - float mf = (float)m; - uint indx = (xin & 0x007f0000) + ((xin & 0x00008000) << 1); - - // F - Y - float f = __clc_as_float(0x3f000000 | indx) - - __clc_as_float(0x3f000000 | (xin & MANTBITS_SP32)); - - indx = indx >> 16; - r = f * USE_TABLE(log_inv_tbl, indx); - - // 1/3, 1/2 - float poly = - __spirv_ocl_mad(__spirv_ocl_mad(r, 0x1.555556p-2f, 0.5f), r * r, r); - -#if defined(COMPILING_LOG2) - float2 tv = USE_TABLE(log2_tbl, indx); - z1 = tv.s0 + mf; - z2 = __spirv_ocl_mad(poly, -LOG2E, tv.s1); -#elif defined(COMPILING_LOG10) - float2 tv = USE_TABLE(log10_tbl, indx); - z1 = __spirv_ocl_mad(mf, LOG10_2_HEAD, tv.s0); - z2 = __spirv_ocl_mad(poly, -LOG10E, mf * LOG10_2_TAIL) + tv.s1; -#else - float2 tv = USE_TABLE(log_tbl, indx); - z1 = __spirv_ocl_mad(mf, LOG2_HEAD, tv.s0); - z2 = __spirv_ocl_mad(mf, LOG2_TAIL, -poly) + tv.s1; -#endif - - float z = z1 + z2; - z = near1 ? znear1 : z; - - // Corner cases - z = ax >= PINFBITPATT_SP32 ? x : z; - z = xi != ax ? __clc_as_float(QNANBITPATT_SP32) : z; - z = ax == 0 ? __clc_as_float(NINFBITPATT_SP32) : z; - - return z; -} - -#ifdef cl_khr_fp64 - -_CLC_OVERLOAD _CLC_DEF double -#if defined(COMPILING_LOG2) -__spirv_ocl_log2(double x) -#elif defined(COMPILING_LOG10) -__spirv_ocl_log10(double x) -#else -__spirv_ocl_log(double x) -#endif -{ - -#ifndef COMPILING_LOG2 - // log2_lead and log2_tail sum to an extra-precise version of ln(2) - const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */ - const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ -#endif - -#if defined(COMPILING_LOG10) - // log10e_lead and log10e_tail sum to an extra-precision version of log10(e) - // (19 bits in lead) - const double log10e_lead = - 4.34293746948242187500e-01; /* 0x3fdbcb7800000000 */ - const double log10e_tail = - 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */ -#elif defined(COMPILING_LOG2) - // log2e_lead and log2e_tail sum to an extra-precision version of log2(e) (19 - // bits in lead) - const double log2e_lead = 1.44269180297851562500E+00; /* 0x3FF7154400000000 */ - const double log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */ -#endif - - // log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000 - // log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 - const double log_thresh1 = 0x1.e0faap-1; - const double log_thresh2 = 0x1.1082cp+0; - - int is_near = x >= log_thresh1 & x <= log_thresh2; - - // Near 1 code - double r = x - 1.0; - double u = r / (2.0 + r); - double correction = r * u; - u = u + u; - double v = u * u; - double r1 = r; - - const double ca_1 = 8.33333333333317923934e-02; /* 0x3fb55555555554e6 */ - const double ca_2 = 1.25000000037717509602e-02; /* 0x3f89999999bac6d4 */ - const double ca_3 = 2.23213998791944806202e-03; /* 0x3f62492307f1519f */ - const double ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */ - - double r2 = __spirv_ocl_fma( - u * v, - __spirv_ocl_fma( - v, __spirv_ocl_fma(v, __spirv_ocl_fma(v, ca_4, ca_3), ca_2), ca_1), - -correction); - -#if defined(COMPILING_LOG10) - r = r1; - r1 = __clc_as_double(__clc_as_ulong(r1) & 0xffffffff00000000); - r2 = r2 + (r - r1); - double ret_near = __spirv_ocl_fma( - log10e_lead, r1, - __spirv_ocl_fma(log10e_lead, r2, - __spirv_ocl_fma(log10e_tail, r1, log10e_tail * r2))); -#elif defined(COMPILING_LOG2) - r = r1; - r1 = __clc_as_double(__clc_as_ulong(r1) & 0xffffffff00000000); - r2 = r2 + (r - r1); - double ret_near = __spirv_ocl_fma( - log2e_lead, r1, - __spirv_ocl_fma(log2e_lead, r2, - __spirv_ocl_fma(log2e_tail, r1, log2e_tail * r2))); -#else - double ret_near = r1 + r2; -#endif - - // This is the far from 1 code - - // Deal with subnormal - ulong ux = __clc_as_ulong(x); - ulong uxs = - __clc_as_ulong(__clc_as_double(0x03d0000000000000UL | ux) - 0x1.0p-962); - int c = ux < IMPBIT_DP64; - ux = c ? uxs : ux; - int expadjust = c ? 60 : 0; - - int xexp = ((__clc_as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust; - double f = __clc_as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); - int index = __clc_as_int2(ux).hi >> 13; - index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); - - double z1 = USE_TABLE(ln_tbl_lo, (index - 64)); - double q = USE_TABLE(ln_tbl_hi, (index - 64)); - - double f1 = index * 0x1.0p-7; - double f2 = f - f1; - u = f2 / __spirv_ocl_fma(f2, 0.5, f1); - v = u * u; - - const double cb_1 = 8.33333333333333593622e-02; /* 0x3fb5555555555557 */ - const double cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */ - const double cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */ - - double poly = v * __spirv_ocl_fma(v, __spirv_ocl_fma(v, cb_3, cb_2), cb_1); - double z2 = q + __spirv_ocl_fma(u, poly, u); - - double dxexp = (double)xexp; -#if defined(COMPILING_LOG10) - // Add xexp * log(2) to z1,z2 to get log(x) - r1 = __spirv_ocl_fma(dxexp, log2_lead, z1); - r2 = __spirv_ocl_fma(dxexp, log2_tail, z2); - double ret_far = __spirv_ocl_fma( - log10e_lead, r1, - __spirv_ocl_fma(log10e_lead, r2, - __spirv_ocl_fma(log10e_tail, r1, log10e_tail * r2))); -#elif defined(COMPILING_LOG2) - r1 = __spirv_ocl_fma(log2e_lead, z1, dxexp); - r2 = __spirv_ocl_fma(log2e_lead, z2, - __spirv_ocl_fma(log2e_tail, z1, log2e_tail * z2)); - double ret_far = r1 + r2; -#else - r1 = __spirv_ocl_fma(dxexp, log2_lead, z1); - r2 = __spirv_ocl_fma(dxexp, log2_tail, z2); - double ret_far = r1 + r2; -#endif - - double ret = is_near ? ret_near : ret_far; - - ret = __spirv_IsInf(x) ? __clc_as_double(PINFBITPATT_DP64) : ret; - ret = __spirv_IsNan(x) || (x < 0.0) ? __clc_as_double(QNANBITPATT_DP64) : ret; - ret = x == 0.0 ? __clc_as_double(NINFBITPATT_DP64) : ret; - return ret; -} - -#endif // cl_khr_fp64 diff --git a/libclc/libspirv/lib/generic/math/logb.cl b/libclc/libspirv/lib/generic/math/logb.cl index 3d0e58da201fe..54606ad5a50c4 100644 --- a/libclc/libspirv/lib/generic/math/logb.cl +++ b/libclc/libspirv/lib/generic/math/logb.cl @@ -6,43 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include +#define FUNCTION __spirv_ocl_logb +#define __CLC_FUNCTION(x) __clc_logb +#define __CLC_BODY -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_logb(float x) { - int ax = __clc_as_int(x) & EXSIGNBIT_SP32; - float s = -118 - __spirv_ocl_clz(ax); - float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - r = ax >= PINFBITPATT_SP32 ? __clc_as_float(ax) : r; - r = ax < 0x00800000 ? s : r; - r = ax == 0 ? __clc_as_float(NINFBITPATT_SP32) : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_logb, float); - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_logb(double x) { - long ax = __clc_as_long(x) & EXSIGNBIT_DP64; - double s = -1011L - __spirv_ocl_clz(ax); - double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; - r = ax >= PINFBITPATT_DP64 ? __clc_as_double(ax) : r; - r = ax < 0x0010000000000000L ? s : r; - r = ax == 0L ? __clc_as_double(NINFBITPATT_DP64) : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_logb, double) -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(half, __spirv_ocl_logb, __builtin_logbf, half) - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/mad.cl b/libclc/libspirv/lib/generic/math/mad.cl index a2b6b64223664..22f951eda7048 100644 --- a/libclc/libspirv/lib/generic/math/mad.cl +++ b/libclc/libspirv/lib/generic/math/mad.cl @@ -6,22 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, __spirv_ocl_mad, __clc_mad, float, float, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(double, __spirv_ocl_mad, __clc_mad, double, double, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(half, __spirv_ocl_mad, __clc_mad, half, half, half) - -#endif +#define FUNCTION __spirv_ocl_mad +#define __CLC_FUNCTION(x) __clc_mad +#define __CLC_BODY +#include diff --git a/libclc/libspirv/lib/generic/math/maxmag.cl b/libclc/libspirv/lib/generic/math/maxmag.cl index 5714c61ca7ab9..4c981d6caaadf 100644 --- a/libclc/libspirv/lib/generic/math/maxmag.cl +++ b/libclc/libspirv/lib/generic/math/maxmag.cl @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include #include -#define __CLC_BODY +#define FUNCTION __spirv_ocl_maxmag +#define __CLC_FUNCTION(x) __clc_maxmag +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/maxmag.inc b/libclc/libspirv/lib/generic/math/maxmag.inc deleted file mode 100644 index 44cf21cfda992..0000000000000 --- a/libclc/libspirv/lib/generic/math/maxmag.inc +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_maxmag(__CLC_GENTYPE x, - __CLC_GENTYPE y) { - const __CLC_GENTYPE res = - __spirv_ocl_select(y, x, - __CLC_CONVERT_BIT_INTN(__spirv_FOrdGreaterThan( - __spirv_ocl_fabs(x), __spirv_ocl_fabs(y)))); - return __spirv_ocl_select( - res, __spirv_ocl_fmax(x, y), - __CLC_CONVERT_BIT_INTN( - (__spirv_IsNan(x) | __spirv_IsNan(y) | - __spirv_FOrdEqual(__spirv_ocl_fabs(x), __spirv_ocl_fabs(y))))); -} diff --git a/libclc/libspirv/lib/generic/math/minmag.cl b/libclc/libspirv/lib/generic/math/minmag.cl index 7fb773c6fdf9a..a900ce8d6014c 100644 --- a/libclc/libspirv/lib/generic/math/minmag.cl +++ b/libclc/libspirv/lib/generic/math/minmag.cl @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include #include -#define __CLC_BODY +#define FUNCTION __spirv_ocl_minmag +#define __CLC_FUNCTION(x) __clc_minmag +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/minmag.inc b/libclc/libspirv/lib/generic/math/minmag.inc deleted file mode 100644 index 9be181f718bff..0000000000000 --- a/libclc/libspirv/lib/generic/math/minmag.inc +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_minmag(__CLC_GENTYPE x, - __CLC_GENTYPE y) { - const __CLC_GENTYPE res = - __spirv_ocl_select(y, x, - __CLC_CONVERT_BIT_INTN(__spirv_FOrdLessThan( - __spirv_ocl_fabs(x), __spirv_ocl_fabs(y)))); - return __spirv_ocl_select( - res, __spirv_ocl_fmin(x, y), - __CLC_CONVERT_BIT_INTN( - (__spirv_IsNan(x) | __spirv_IsNan(y) | - __spirv_FOrdEqual(__spirv_ocl_fabs(x), __spirv_ocl_fabs(y))))); -} diff --git a/libclc/libspirv/lib/generic/math/modf.cl b/libclc/libspirv/lib/generic/math/modf.cl index 2202af1d7e5a3..303defe821798 100644 --- a/libclc/libspirv/lib/generic/math/modf.cl +++ b/libclc/libspirv/lib/generic/math/modf.cl @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include - -#define __CLC_BODY +#define FUNCTION __spirv_ocl_modf +#define __CLC_FUNCTION(x) __clc_modf +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/modf.inc b/libclc/libspirv/lib/generic/math/modf.inc deleted file mode 100644 index b17779ad34735..0000000000000 --- a/libclc/libspirv/lib/generic/math/modf.inc +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 64 -#define ZERO 0.0 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rlong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define ZERO 0.0f -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define ZERO 0.0h -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rshort, __CLC_VECSIZE) -#endif - -#ifdef __CLC_SCALAR -#undef __CLC_CONVERT_NATN -#define __CLC_CONVERT_NATN -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_modf(__CLC_GENTYPE x, __private __CLC_GENTYPE *iptr) { - *iptr = __spirv_ocl_trunc(x); - return __spirv_ocl_copysign( - __CLC_CONVERT_NATN(__spirv_IsInf(x)) ? ZERO : x - *iptr, x); -} - -#define MODF_DEF(addrspace) \ - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf( \ - __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __private __CLC_GENTYPE private_iptr; \ - __CLC_GENTYPE ret = __spirv_ocl_modf(x, &private_iptr); \ - *iptr = private_iptr; \ - return ret; \ - } - -MODF_DEF(local); -MODF_DEF(global); - -#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED -MODF_DEF(generic); -#endif - -#undef __CLC_CONVERT_NATN -#undef ZERO diff --git a/libclc/libspirv/lib/generic/math/native_divide.cl b/libclc/libspirv/lib/generic/math/native_divide.cl index 8331a8daf3012..65c3b6c40662d 100644 --- a/libclc/libspirv/lib/generic/math/native_divide.cl +++ b/libclc/libspirv/lib/generic/math/native_divide.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_divide +#define __CLC_FUNCTION(x) __clc_native_divide +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_divide.inc b/libclc/libspirv/lib/generic/math/native_divide.inc deleted file mode 100644 index 2ea25a0f390f5..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_divide.inc +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) { - return x / y; -} diff --git a/libclc/libspirv/lib/generic/math/native_exp10.cl b/libclc/libspirv/lib/generic/math/native_exp10.cl index 626590fb1d568..6b87f577264c7 100644 --- a/libclc/libspirv/lib/generic/math/native_exp10.cl +++ b/libclc/libspirv/lib/generic/math/native_exp10.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_exp10 +#define __CLC_FUNCTION(x) __clc_native_exp10 +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_exp10.inc b/libclc/libspirv/lib/generic/math/native_exp10.inc deleted file mode 100644 index f6b369a1e9719..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_exp10.inc +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_native_exp10(__CLC_GENTYPE val) { - return __spirv_ocl_native_exp2(val * M_LOG210_F); -} diff --git a/libclc/libspirv/lib/generic/math/native_powr.cl b/libclc/libspirv/lib/generic/math/native_powr.cl index dfa219bbb70a5..345cfa737413a 100644 --- a/libclc/libspirv/lib/generic/math/native_powr.cl +++ b/libclc/libspirv/lib/generic/math/native_powr.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_powr +#define __CLC_FUNCTION(x) __clc_native_powr +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_powr.inc b/libclc/libspirv/lib/generic/math/native_powr.inc deleted file mode 100644 index 0ca35ef7ee979..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_powr.inc +++ /dev/null @@ -1,14 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_native_powr(__CLC_GENTYPE x, - __CLC_GENTYPE y) { - // x^y == 2^{log2 x^y} == 2^{y * log2 x} - // for x < 0 propagate nan created by log2 - return __spirv_ocl_native_exp2(y * __spirv_ocl_native_log2(x)); -} diff --git a/libclc/libspirv/lib/generic/math/native_recip.cl b/libclc/libspirv/lib/generic/math/native_recip.cl index d8cfc45ec9ea9..e5ea023458a31 100644 --- a/libclc/libspirv/lib/generic/math/native_recip.cl +++ b/libclc/libspirv/lib/generic/math/native_recip.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_recip +#define __CLC_FUNCTION(x) __clc_native_recip +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_recip.inc b/libclc/libspirv/lib/generic/math/native_recip.inc deleted file mode 100644 index 371c67dd03a10..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_recip.inc +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_native_recip(__CLC_GENTYPE val) { - return 1.0f / val; -} diff --git a/libclc/libspirv/lib/generic/math/native_rsqrt.cl b/libclc/libspirv/lib/generic/math/native_rsqrt.cl index b83a9f398de8a..9bc56f0c6f9ad 100644 --- a/libclc/libspirv/lib/generic/math/native_rsqrt.cl +++ b/libclc/libspirv/lib/generic/math/native_rsqrt.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_rsqrt +#define __CLC_FUNCTION(x) __clc_native_rsqrt +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_rsqrt.inc b/libclc/libspirv/lib/generic/math/native_rsqrt.inc deleted file mode 100644 index 91076d06f8ee2..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_rsqrt.inc +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__spirv_ocl_native_rsqrt(__CLC_GENTYPE val) { - return 1.0f / __spirv_ocl_native_sqrt(val); -} diff --git a/libclc/libspirv/lib/generic/math/native_tan.cl b/libclc/libspirv/lib/generic/math/native_tan.cl index f35ca96d09d6d..8abf6228fa619 100644 --- a/libclc/libspirv/lib/generic/math/native_tan.cl +++ b/libclc/libspirv/lib/generic/math/native_tan.cl @@ -6,8 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#define __CLC_BODY #define __FLOAT_ONLY +#define FUNCTION __spirv_ocl_native_tan +#define __CLC_FUNCTION(x) __clc_native_tan +#define __CLC_BODY #include diff --git a/libclc/libspirv/lib/generic/math/native_tan.inc b/libclc/libspirv/lib/generic/math/native_tan.inc deleted file mode 100644 index 07fbded42af89..0000000000000 --- a/libclc/libspirv/lib/generic/math/native_tan.inc +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_native_tan(__CLC_GENTYPE val) { - return __spirv_ocl_native_sin(val) / __spirv_ocl_native_cos(val); -} diff --git a/libclc/libspirv/lib/generic/math/pow.inc b/libclc/libspirv/lib/generic/math/pow.inc deleted file mode 100644 index 5fc56571caff1..0000000000000 --- a/libclc/libspirv/lib/generic/math/pow.inc +++ /dev/null @@ -1,14 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_pow(__CLC_GENTYPE x, - __CLC_GENTYPE y) { - return __clc_pow(x, y); -} diff --git a/libclc/libspirv/lib/generic/math/pown.inc b/libclc/libspirv/lib/generic/math/pown.inc deleted file mode 100644 index 3fd7c0799a407..0000000000000 --- a/libclc/libspirv/lib/generic/math/pown.inc +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_pown(__CLC_GENTYPE x, - __CLC_INTN y) { - return __clc_pown(x, y); -} diff --git a/libclc/libspirv/lib/generic/math/rsqrt.cl b/libclc/libspirv/lib/generic/math/rsqrt.cl index 7dfbeceeb05c5..c599928be17bf 100644 --- a/libclc/libspirv/lib/generic/math/rsqrt.cl +++ b/libclc/libspirv/lib/generic/math/rsqrt.cl @@ -6,36 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#include +#define FUNCTION __spirv_ocl_rsqrt +#define __CLC_FUNCTION(x) __clc_rsqrt +#define __CLC_BODY -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_rsqrt(float x) { - return 1.0f / __spirv_ocl_sqrt(x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_rsqrt, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_rsqrt(double x) { - return 1.0 / __spirv_ocl_sqrt(x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_rsqrt, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __spirv_ocl_rsqrt(half x) { - return 1.0f / __spirv_ocl_sqrt((float)x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_rsqrt, half); - -#endif +#include diff --git a/libclc/libspirv/lib/generic/math/sincos.inc b/libclc/libspirv/lib/generic/math/sincos.inc deleted file mode 100644 index 72cbf789ebb6d..0000000000000 --- a/libclc/libspirv/lib/generic/math/sincos.inc +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ - _CLC_OVERLOAD _CLC_DEF TYPE __spirv_ocl_sincos(TYPE x, \ - ADDRSPACE TYPE *cosval) { \ - *cosval = __spirv_ocl_cos(x); \ - return __spirv_ocl_sin(x); \ - } - -__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) -__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) -__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) - -#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED -__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) -#endif - -#undef __CLC_DECLARE_SINCOS diff --git a/libclc/libspirv/lib/generic/math/sincospiF_piby4.h b/libclc/libspirv/lib/generic/math/sincospiF_piby4.h deleted file mode 100644 index 8a9d1836e9832..0000000000000 --- a/libclc/libspirv/lib/generic/math/sincospiF_piby4.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4] -_CLC_INLINE float2 __libclc__sincosf_piby4(float x) { - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - - const float sc1 = -0.166666666638608441788607926e0F; - const float sc2 = 0.833333187633086262120839299e-2F; - const float sc3 = -0.198400874359527693921333720e-3F; - const float sc4 = 0.272500015145584081596826911e-5F; - - const float cc1 = 0.41666666664325175238031e-1F; - const float cc2 = -0.13888887673175665567647e-2F; - const float cc3 = 0.24800600878112441958053e-4F; - const float cc4 = -0.27301013343179832472841e-6F; - - float x2 = x * x; - - float2 ret; - ret.x = __spirv_ocl_mad( - x * x2, - __spirv_ocl_mad( - x2, __spirv_ocl_mad(x2, __spirv_ocl_mad(x2, sc4, sc3), sc2), sc1), - x); - ret.y = __spirv_ocl_mad( - x2 * x2, - __spirv_ocl_mad( - x2, __spirv_ocl_mad(x2, __spirv_ocl_mad(x2, cc4, cc3), cc2), cc1), - __spirv_ocl_mad(x2, -0.5f, 1.0f)); - return ret; -} diff --git a/libclc/libspirv/lib/generic/math/sqrt.cl b/libclc/libspirv/lib/generic/math/sqrt.cl index b67d26cbe5cfc..288e6479639f1 100644 --- a/libclc/libspirv/lib/generic/math/sqrt.cl +++ b/libclc/libspirv/lib/generic/math/sqrt.cl @@ -9,7 +9,7 @@ #include #include -#define FUNCTION spirv_ocl_sqrt +#define FUNCTION __spirv_ocl_sqrt #define __CLC_FUNCTION(x) __clc_sqrt #define __CLC_BODY diff --git a/libclc/libspirv/lib/generic/math/tables.cl b/libclc/libspirv/lib/generic/math/tables.cl deleted file mode 100644 index b1d00f1923648..0000000000000 --- a/libclc/libspirv/lib/generic/math/tables.cl +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include - -DECLARE_TABLE(uchar, PIBITS_TBL, ) = { - 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175, - 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31, - 235, 158, 212, 181, 168, 127, 121, 154, 253, 24, 61, 221, 38, - 44, 159, 60, 251, 217, 180, 125, 180, 41, 104, 45, 70, 188, - 188, 63, 96, 22, 120, 255, 95, 226, 127, 236, 160, 228, 247, - 46, 126, 17, 114, 210, 231, 76, 13, 230, 88, 71, 230, 4, 249, - 125, 209, 154, 192, 113, 166, 19, 18, 237, 186, 212, 215, 8, - 162, 251, 156, 166, 196, 114, 172, 119, 248, 115, 72, 70, 39, - 168, 187, 36, 25, 128, 75, 55, 9, 233, 184, 145, 220, 134, 21, - 239, 122, 175, 142, 69, 249, 7, 65, 14, 241, 100, 86, 138, 109, - 3, 119, 211, 212, 71, 95, 157, 240, 167, 84, 16, 57, 185, 13, - 230, 139, 2, 0, 0, 0, 0, 0, 0, 0 -}; - -uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) { - return *(__constant uint4 *)(PIBITS_TBL + idx); -} diff --git a/libclc/libspirv/lib/generic/math/tgamma.cl b/libclc/libspirv/lib/generic/math/tgamma.cl index 1d8362039acf7..e837171996e78 100644 --- a/libclc/libspirv/lib/generic/math/tgamma.cl +++ b/libclc/libspirv/lib/generic/math/tgamma.cl @@ -6,63 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -#include -#include +#define FUNCTION __spirv_ocl_tgamma +#define __CLC_FUNCTION(x) __clc_tgamma +#define __CLC_BODY -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_tgamma(float x) { - const float pi = 3.1415926535897932384626433832795f; - float ax = __spirv_ocl_fabs(x); - float lg = __spirv_ocl_lgamma(ax); - float g = __spirv_ocl_exp(lg); - - if (x < 0.0f) { - float z = __spirv_ocl_sinpi(x); - g = g * ax * z; - g = pi / g; - g = g == 0 ? __clc_as_float(PINFBITPATT_SP32) : g; - g = z == 0 ? __clc_as_float(QNANBITPATT_SP32) : g; - } - - return g; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_tgamma, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_tgamma(double x) { - const double pi = 3.1415926535897932384626433832795; - double ax = __spirv_ocl_fabs(x); - double lg = __spirv_ocl_lgamma(ax); - double g = __spirv_ocl_exp(lg); - - if (x < 0.0) { - double z = __spirv_ocl_sinpi(x); - g = g * ax * z; - g = pi / g; - g = g == 0 ? __clc_as_double(PINFBITPATT_DP64) : g; - g = z == 0 ? __clc_as_double(QNANBITPATT_DP64) : g; - } - - return g; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_tgamma, - double); - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half __spirv_ocl_tgamma(half x) { - return __spirv_ocl_tgamma((float)x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_tgamma, half) - -#endif +#include diff --git a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES index cf9a5a2c51152..74466057d6e90 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES +++ b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES @@ -36,7 +36,6 @@ math/hypot.cl math/ilogb.cl math/ldexp.cl math/lgamma.cl -math/log.cl math/log10.cl math/log1p.cl math/log2.cl @@ -65,8 +64,6 @@ math/rsqrt.cl math/sin.cl math/sincos.cl math/sinh.cl -math/sinpi.cl -math/sqrt.cl math/tan.cl math/tanh.cl math/tgamma.cl