From d5d7ef26e21beac7b39ef8d15762ddc3708a20b7 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 28 Oct 2022 08:59:58 +0800 Subject: [PATCH 1/6] add test for bf16 to fp32 conversions. Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_bf16_to_fp32.cpp | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 SYCL/DeviceLib/imf_bf16_to_fp32.cpp diff --git a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp new file mode 100644 index 0000000000..fb2ca561d8 --- /dev/null +++ b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp @@ -0,0 +1,53 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// UNSUPPORTED: cuda || hip + +#include "imf_utils.hpp" + +extern "C" { +float __imf_bfloat162float(uint16_t); +} + +int main() { + sycl::queue device_queue(sycl::default_selector_v); + std::cout << "Running on " + << device_queue.get_device().get_info() + << "\n"; + + { + std::initializer_list input_vals = { + 0x0, // +0 + 0x8000, // -0 + 0x1, // min positive subnormal + 0x7F, // max positive subnormal + 0x5A, // positive subnormal + 0x8001, // max negative subnormal + 0x807F, // min negative subnormal + 0x805A, // negative subnormal + 0x7F80, // +inf + 0xFF80, // -inf + 0x2E05, // positive normal + 0x7E5A, // positive normal + 0xAE44, // negative normal + 0xFF84, // negative normal + 0x7F7F, // max positive normal + 0xFF7F, // min negative normal + }; + + std::initializer_list ref_vals = { + 0x0, 0x80000000, 0x10000, 0x7F0000, 0x5A0000, 0x80010000, + 0x807F0000, 0x805A0000, 0x7F800000, 0xFF800000, 0x2E050000, + 0x7E5A0000, 0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000 + }; + + test(device_queue, input_vals, ref_vals, F(__imf_bfloat162float)); + } +} From a09591e4ea03fd729e0cb7d9d37782549191e7c0 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 7 Nov 2022 14:39:49 +0800 Subject: [PATCH 2/6] add tests for fp32<====>bf16 conversions Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_bf16_to_fp32.cpp | 4 +- SYCL/DeviceLib/imf_fp32_to_bf16.cpp | 79 +++++++++++++++++++++++++++++ SYCL/DeviceLib/imf_utils.hpp | 3 ++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 SYCL/DeviceLib/imf_fp32_to_bf16.cpp diff --git a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp index fb2ca561d8..43ae246501 100644 --- a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp +++ b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp @@ -10,6 +10,8 @@ // // UNSUPPORTED: cuda || hip +// All __imf_* bf16 functions are implemented via fp32 emulation, so we don't +// need to check whether underlying device supports bf16 or not. #include "imf_utils.hpp" extern "C" { @@ -48,6 +50,6 @@ int main() { 0x7E5A0000, 0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000 }; - test(device_queue, input_vals, ref_vals, F(__imf_bfloat162float)); + test(device_queue, input_vals, ref_vals, FT(uint32_t, __imf_bfloat162float)); } } diff --git a/SYCL/DeviceLib/imf_fp32_to_bf16.cpp b/SYCL/DeviceLib/imf_fp32_to_bf16.cpp new file mode 100644 index 0000000000..4a5a68cbcd --- /dev/null +++ b/SYCL/DeviceLib/imf_fp32_to_bf16.cpp @@ -0,0 +1,79 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// UNSUPPORTED: cuda || hip + +// All __imf_* bf16 functions are implemented via fp32 emulation, so we don't +// need to check whether underlying device supports bf16 or not. +#include "imf_utils.hpp" + +extern "C" { +uint16_t __imf_float2bfloat16(float); +uint16_t __imf_float2bfloat16_rd(float); +uint16_t __imf_float2bfloat16_rn(float); +uint16_t __imf_float2bfloat16_ru(float); +uint16_t __imf_float2bfloat16_rz(float); +} + +int main() { + sycl::queue device_queue(sycl::default_selector_v); + std::cout << "Running on " + << device_queue.get_device().get_info() + << "\n"; + + { + std::initializer_list input_vals = { + 0x0, // +0 + 0x80000000, // -0 + 0x1, // min positive subnormal + 0x7FFFFF, // max positive subnormal + 0x5A6BFC, // positive subnormal + 0x80000001, // max negative subnormal + 0x807FFFFF, // min negative subnormal + 0x805A6FED, // negative subnormal + 0x7F800000, // +inf + 0xFF800000, // -inf + 0x2E05CBA9, // positive normal + 0x7E5A8935, // positive normal + 0xAE4411FC, // negative normal + 0xFA84C773, // negative normal + 0x7F7FFFFF, // max positive normal + 0x765FCEED, // positive normal + 0xFF7FFFFF, // min negative normal + 0xAC763561, // negative normal + }; + + std::initializer_list ref_vals = { + 0x0, 0x8000, 0x0, 0x80, 0x5a, 0x8000, 0x8080, 0x805A, 0x7F80, + 0xFF80, 0x2E06, 0x7E5B, 0xAE44, 0xFA85, 0x7F80, 0x7660, 0xFF80, 0xAC76}; + + std::initializer_list ref_vals_rd = { + 0x0, 0x8000, 0x0, 0x7F, 0x5A, 0x8001, 0x8080, 0x805B, 0x7F80, + 0xFF80, 0x2E05, 0x7E5A, 0xAE45, 0xFA85, 0x7F7F, 0x765F, 0xFF80, 0xAC77}; + + std::initializer_list ref_vals_ru = { + 0x0, 0x8000, 0x1, 0x80, 0x5B, 0x8000, 0x807F, 0x805A, 0x7F80, + 0xFF80, 0x2E06, 0x7E5B, 0xAE44, 0xFA84, 0x7F80, 0x7660, 0xFF7F, 0xAC76}; + + std::initializer_list ref_vals_rz = { + 0x0, 0x8000, 0x0, 0x7F, 0x5A, 0x8000, 0x807F, 0x805A, 0x7F80, + 0xFF80, 0x2E05, 0x7E5A, 0xAE44, 0xFA84, 0x7F7F, 0x765F, 0xFF7F, 0xAC76}; + + test(device_queue, input_vals, ref_vals, FT1(float, __imf_float2bfloat16)); + test(device_queue, input_vals, ref_vals_rd, + FT1(float, __imf_float2bfloat16_rd)); + test(device_queue, input_vals, ref_vals, + FT1(float, __imf_float2bfloat16_rn)); + test(device_queue, input_vals, ref_vals_ru, + FT1(float, __imf_float2bfloat16_ru)); + test(device_queue, input_vals, ref_vals_rz, + FT1(float, __imf_float2bfloat16_rz)); + } +} diff --git a/SYCL/DeviceLib/imf_utils.hpp b/SYCL/DeviceLib/imf_utils.hpp index 36c88ea11f..c7f3f672ef 100644 --- a/SYCL/DeviceLib/imf_utils.hpp +++ b/SYCL/DeviceLib/imf_utils.hpp @@ -163,6 +163,9 @@ void test3(sycl::queue &q, std::initializer_list Input1, #define F(Name) [](auto x) { return (Name)(x); } #define FT(T, Name) [](auto x) { return __builtin_bit_cast(T, (Name)(x)); } +// Used for float2bf16 tests, all uint32_t input converted to float, then pass +// to __imf_float2bfloat16* utils. +#define FT1(T, Name) [](auto x) { return (Name)(__builtin_bit_cast(T, x)); } #define F2(Name) [](auto x, auto y) { return (Name)(x, y); } #define F3(Name) [](auto x, auto y, auto z) { return (Name)(x, y, z); } #if defined(__SPIR__) From 8bacbfaef5d244f0a177d30450ad89d6e7230a69 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 9 Nov 2022 21:33:44 +0800 Subject: [PATCH 3/6] fix clang-format issue Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_bf16_to_fp32.cpp | 42 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp index 43ae246501..747864c742 100644 --- a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp +++ b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp @@ -26,30 +26,30 @@ int main() { { std::initializer_list input_vals = { - 0x0, // +0 - 0x8000, // -0 - 0x1, // min positive subnormal - 0x7F, // max positive subnormal - 0x5A, // positive subnormal - 0x8001, // max negative subnormal - 0x807F, // min negative subnormal - 0x805A, // negative subnormal - 0x7F80, // +inf - 0xFF80, // -inf - 0x2E05, // positive normal - 0x7E5A, // positive normal - 0xAE44, // negative normal - 0xFF84, // negative normal - 0x7F7F, // max positive normal - 0xFF7F, // min negative normal + 0x0, // +0 + 0x8000, // -0 + 0x1, // min positive subnormal + 0x7F, // max positive subnormal + 0x5A, // positive subnormal + 0x8001, // max negative subnormal + 0x807F, // min negative subnormal + 0x805A, // negative subnormal + 0x7F80, // +inf + 0xFF80, // -inf + 0x2E05, // positive normal + 0x7E5A, // positive normal + 0xAE44, // negative normal + 0xFF84, // negative normal + 0x7F7F, // max positive normal + 0xFF7F, // min negative normal }; std::initializer_list ref_vals = { - 0x0, 0x80000000, 0x10000, 0x7F0000, 0x5A0000, 0x80010000, - 0x807F0000, 0x805A0000, 0x7F800000, 0xFF800000, 0x2E050000, - 0x7E5A0000, 0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000 - }; + 0x0, 0x80000000, 0x10000, 0x7F0000, 0x5A0000, 0x80010000, + 0x807F0000, 0x805A0000, 0x7F800000, 0xFF800000, 0x2E050000, 0x7E5A0000, + 0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000}; - test(device_queue, input_vals, ref_vals, FT(uint32_t, __imf_bfloat162float)); + test(device_queue, input_vals, ref_vals, + FT(uint32_t, __imf_bfloat162float)); } } From c5f8b1011bb6f86d39d21e1f454dc293adea1ac9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 24 Nov 2022 09:43:10 +0800 Subject: [PATCH 4/6] Use public APIs to test bf16 libdevice funcs Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_fp32_to_bf16.cpp | 23 +++++++++-------------- SYCL/DeviceLib/imf_utils.hpp | 4 ++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/SYCL/DeviceLib/imf_fp32_to_bf16.cpp b/SYCL/DeviceLib/imf_fp32_to_bf16.cpp index 4a5a68cbcd..eceb1b3b0b 100644 --- a/SYCL/DeviceLib/imf_fp32_to_bf16.cpp +++ b/SYCL/DeviceLib/imf_fp32_to_bf16.cpp @@ -13,15 +13,9 @@ // All __imf_* bf16 functions are implemented via fp32 emulation, so we don't // need to check whether underlying device supports bf16 or not. #include "imf_utils.hpp" - -extern "C" { -uint16_t __imf_float2bfloat16(float); -uint16_t __imf_float2bfloat16_rd(float); -uint16_t __imf_float2bfloat16_rn(float); -uint16_t __imf_float2bfloat16_ru(float); -uint16_t __imf_float2bfloat16_rz(float); -} - +#include +using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +namespace sycl_imf = sycl::ext::intel::math; int main() { sycl::queue device_queue(sycl::default_selector_v); std::cout << "Running on " @@ -66,14 +60,15 @@ int main() { 0x0, 0x8000, 0x0, 0x7F, 0x5A, 0x8000, 0x807F, 0x805A, 0x7F80, 0xFF80, 0x2E05, 0x7E5A, 0xAE44, 0xFA84, 0x7F7F, 0x765F, 0xFF7F, 0xAC76}; - test(device_queue, input_vals, ref_vals, FT1(float, __imf_float2bfloat16)); + test(device_queue, input_vals, ref_vals, + FT12(float, uint16_t, sycl_imf::float2bfloat16)); test(device_queue, input_vals, ref_vals_rd, - FT1(float, __imf_float2bfloat16_rd)); + FT12(float, uint16_t, sycl_imf::float2bfloat16_rd)); test(device_queue, input_vals, ref_vals, - FT1(float, __imf_float2bfloat16_rn)); + FT12(float, uint16_t, sycl_imf::float2bfloat16_rn)); test(device_queue, input_vals, ref_vals_ru, - FT1(float, __imf_float2bfloat16_ru)); + FT12(float, uint16_t, sycl_imf::float2bfloat16_ru)); test(device_queue, input_vals, ref_vals_rz, - FT1(float, __imf_float2bfloat16_rz)); + FT12(float, uint16_t, sycl_imf::float2bfloat16_rz)); } } diff --git a/SYCL/DeviceLib/imf_utils.hpp b/SYCL/DeviceLib/imf_utils.hpp index c7f3f672ef..13790076d3 100644 --- a/SYCL/DeviceLib/imf_utils.hpp +++ b/SYCL/DeviceLib/imf_utils.hpp @@ -166,6 +166,10 @@ void test3(sycl::queue &q, std::initializer_list Input1, // Used for float2bf16 tests, all uint32_t input converted to float, then pass // to __imf_float2bfloat16* utils. #define FT1(T, Name) [](auto x) { return (Name)(__builtin_bit_cast(T, x)); } +#define FT12(T1, T2, Name) \ + [](auto x) { \ + return __builtin_bit_cast(T2, (Name)(__builtin_bit_cast(T1, x))); \ + } #define F2(Name) [](auto x, auto y) { return (Name)(x, y); } #define F3(Name) [](auto x, auto y, auto z) { return (Name)(x, y, z); } #if defined(__SPIR__) From c33722b852d7c1c3c9b58cc1bc054a27d864e871 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 24 Nov 2022 16:07:34 +0800 Subject: [PATCH 5/6] add tests for imf bf16 comparison func Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_bf16_comp_test.cpp | 235 ++++++++++++++++++++++++++ SYCL/DeviceLib/imf_utils.hpp | 9 + 2 files changed, 244 insertions(+) create mode 100644 SYCL/DeviceLib/imf_bf16_comp_test.cpp diff --git a/SYCL/DeviceLib/imf_bf16_comp_test.cpp b/SYCL/DeviceLib/imf_bf16_comp_test.cpp new file mode 100644 index 0000000000..af8e610ab9 --- /dev/null +++ b/SYCL/DeviceLib/imf_bf16_comp_test.cpp @@ -0,0 +1,235 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// UNSUPPORTED: cuda || hip + +#include "imf_utils.hpp" +#include +using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +namespace sycl_imf = sycl::ext::intel::math; +int main() { + sycl::queue device_queue(sycl::default_selector_v); + std::cout << "Running on " + << device_queue.get_device().get_info() + << "\n"; + + // hisnan test + { + std::initializer_list input_vals = {0x0, 0x1, 0x7A, + 0x7F80, 0x7FC0, 0x7FC5}; + std::initializer_list ref_vals = {false, false, false, + false, true, true}; + test(device_queue, input_vals, ref_vals, + FT1(sycl_bfloat16, sycl_imf::hisnan)); + } + + // hisinf + { + std::initializer_list input_vals = { + 0x0, 0x1, 0x7A, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84}; + std::initializer_list ref_vals = {false, false, false, true, false, + false, true, false, false}; + test(device_queue, input_vals, ref_vals, + FT1(sycl_bfloat16, sycl_imf::hisinf)); + } + + // heq + { + std::initializer_list input_vals1 = { + 0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74}; + std::initializer_list input_vals2 = { + 0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84}; + std::initializer_list ref_vals = {false, true, false, false, false, + false, true, true, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::heq)); + } + + // hequ + { + std::initializer_list input_vals1 = { + 0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74, 0x7FC2}; + std::initializer_list input_vals2 = { + 0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0xAE44, 0xFF84, 0xAAEC}; + std::initializer_list ref_vals = {false, true, false, true, true, + true, true, true, false, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hequ)); + } + + // hne + { + std::initializer_list input_vals1 = { + 0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74}; + std::initializer_list input_vals2 = { + 0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84}; + std::initializer_list ref_vals = {true, false, true, false, false, + false, false, false, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hne)); + } + + // hneu + { + std::initializer_list input_vals1 = { + 0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74, 0x7FC2}; + std::initializer_list input_vals2 = { + 0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0xAE44, 0xFF84, 0xAAEC}; + std::initializer_list ref_vals = {true, false, true, true, true, + true, false, false, true, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hneu)); + } + + // hge + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {true, true, false, false, false, + false, true, false, true, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hge)); + } + + // hgeu + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {true, true, false, true, true, + true, true, false, true, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hgeu)); + } + + // hgt + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {true, false, false, false, false, + false, false, false, true, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hgt)); + } + + // hgtu + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {true, false, false, true, true, + true, false, false, true, true}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hgtu)); + } + + // hle + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {false, true, true, false, false, + false, true, true, false, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hle)); + } + + // hleu + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {false, true, true, true, true, + true, true, true, false, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hleu)); + } + + // hlt + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {false, false, true, false, false, + false, false, true, false, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hlt)); + } + + // hltu + { + std::initializer_list input_vals1 = { + 0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2}; + std::initializer_list input_vals2 = { + 0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC}; + std::initializer_list ref_vals = {false, false, true, true, true, + true, false, true, false, false}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT2(sycl_bfloat16, sycl_imf::hltu)); + } + + // hmax + { + std::initializer_list input_vals1 = { + 0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05}; + std::initializer_list input_vals2 = { + 0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55}; + std::initializer_list ref_vals = {0x2, 0x4044, 0x0, 0xAAAA, + 0x1123, 0x7FC0, 0x7F80, 0x2E55}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT22(sycl_bfloat16, uint16_t, sycl_imf::hmax)); + } + + // hmax_nan + { + std::initializer_list input_vals1 = { + 0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05}; + std::initializer_list input_vals2 = { + 0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55}; + std::initializer_list ref_vals = {0x2, 0x4044, 0x0, 0x7FC0, + 0x7FC0, 0x7FC0, 0x7F80, 0x2E55}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT22(sycl_bfloat16, uint16_t, sycl_imf::hmax_nan)); + } + + // hmin + { + std::initializer_list input_vals1 = { + 0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05}; + std::initializer_list input_vals2 = { + 0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55}; + std::initializer_list ref_vals = {0x1, 0xAF84, 0x8000, 0xAAAA, + 0x1123, 0x7FC0, 0x7EEE, 0x2E05}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT22(sycl_bfloat16, uint16_t, sycl_imf::hmin)); + } + + // hmin_nan + { + std::initializer_list input_vals1 = { + 0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05}; + std::initializer_list input_vals2 = { + 0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55}; + std::initializer_list ref_vals = {0x1, 0xAF84, 0x8000, 0x7FC0, + 0x7FC0, 0x7FC0, 0x7EEE, 0x2E05}; + test2(device_queue, input_vals1, input_vals2, ref_vals, + FT22(sycl_bfloat16, uint16_t, sycl_imf::hmin_nan)); + } + return 0; +} diff --git a/SYCL/DeviceLib/imf_utils.hpp b/SYCL/DeviceLib/imf_utils.hpp index 13790076d3..8ecf78d360 100644 --- a/SYCL/DeviceLib/imf_utils.hpp +++ b/SYCL/DeviceLib/imf_utils.hpp @@ -171,6 +171,15 @@ void test3(sycl::queue &q, std::initializer_list Input1, return __builtin_bit_cast(T2, (Name)(__builtin_bit_cast(T1, x))); \ } #define F2(Name) [](auto x, auto y) { return (Name)(x, y); } +#define FT2(T, Name) \ + [](auto x, auto y) { \ + return (Name)(__builtin_bit_cast(T, x), __builtin_bit_cast(T, y)); \ + } +#define FT22(T1, T2, Name) \ + [](auto x, auto y) { \ + return __builtin_bit_cast( \ + T2, (Name)(__builtin_bit_cast(T1, x), __builtin_bit_cast(T1, y))); \ + } #define F3(Name) [](auto x, auto y, auto z) { return (Name)(x, y, z); } #if defined(__SPIR__) #define F_Half1(Name) \ From 09f4b34557a995ec5931c912cc9aa1083b22a531 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 24 Nov 2022 16:18:43 +0800 Subject: [PATCH 6/6] use C++ APIs to test bfloat16 to fp32 conversion Signed-off-by: jinge90 --- SYCL/DeviceLib/imf_bf16_to_fp32.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp index 747864c742..6f0356216d 100644 --- a/SYCL/DeviceLib/imf_bf16_to_fp32.cpp +++ b/SYCL/DeviceLib/imf_bf16_to_fp32.cpp @@ -13,11 +13,9 @@ // All __imf_* bf16 functions are implemented via fp32 emulation, so we don't // need to check whether underlying device supports bf16 or not. #include "imf_utils.hpp" - -extern "C" { -float __imf_bfloat162float(uint16_t); -} - +#include +using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +namespace sycl_imf = sycl::ext::intel::math; int main() { sycl::queue device_queue(sycl::default_selector_v); std::cout << "Running on " @@ -50,6 +48,6 @@ int main() { 0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000}; test(device_queue, input_vals, ref_vals, - FT(uint32_t, __imf_bfloat162float)); + FT12(sycl_bfloat16, uint32_t, sycl_imf::bfloat162float)); } }