diff --git a/SYCL/BFloat16/bfloat16_fmax_fmin.cpp b/SYCL/BFloat16/bfloat16_fmax_fmin.cpp new file mode 100644 index 0000000000..c2aaeccb8a --- /dev/null +++ b/SYCL/BFloat16/bfloat16_fmax_fmin.cpp @@ -0,0 +1,78 @@ +// UNSUPPORTED: hip +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// Currently the feature is supported only on CPU and GPU, natively or by +// software emulation. +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +//==------- bfloat16_fmax_fmin.cpp - SYCL bfloat16 test for fmax, fmin------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "bfloat16_test_utils.hpp" + +int main() { + sycl::queue DeviceQueue(sycl::default_selector_v); + std::cout << "Running on " + << DeviceQueue.get_device().get_info() + << "\n"; + { + std::initializer_list InputVec1 = { + 0x0, 0x8000, 0x1, 0x7FC0, 0x7FC4, 0x7F80, 0xFF80, 0xFE84}; + std::initializer_list InputVec2 = { + 0x0, 0x0, 0x2, 0xAAAA, 0x7FCD, 0x7F7F, 0xFF7F, 0xAE44}; + std::initializer_list MaxResVec = { + 0x0, 0x0, 0x2, 0xAAAA, 0x7FC0, 0x7F80, 0xFF7F, 0xAE44}; + std::initializer_list MinResVec = { + 0x0, 0x8000, 0x1, 0xAAAA, 0x7FC0, 0x7F7F, 0xFF80, 0xFE84}; + + test_host2(InputVec1, InputVec2, MaxResVec, + BF16_FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_device2(DeviceQueue, InputVec1, InputVec2, MaxResVec, + BF16_FUNC2(sycl::ext::oneapi::experimental::fmax)); + + test_host2_marray<1>(InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_host2_marray<2>(InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_host2_marray<3>(InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_host2_marray<4>(InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_device2_marray<1>(DeviceQueue, InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_device2_marray<2>(DeviceQueue, InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_device2_marray<3>(DeviceQueue, InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + test_device2_marray<4>(DeviceQueue, InputVec1, InputVec2, MaxResVec, + FUNC2(sycl::ext::oneapi::experimental::fmax)); + + test_host2(InputVec1, InputVec2, MinResVec, + BF16_FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_device2(DeviceQueue, InputVec1, InputVec2, MinResVec, + BF16_FUNC2(sycl::ext::oneapi::experimental::fmin)); + + test_host2_marray<1>(InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_host2_marray<2>(InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_host2_marray<3>(InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_host2_marray<4>(InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_device2_marray<1>(DeviceQueue, InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_device2_marray<2>(DeviceQueue, InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_device2_marray<3>(DeviceQueue, InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + test_device2_marray<4>(DeviceQueue, InputVec1, InputVec2, MinResVec, + FUNC2(sycl::ext::oneapi::experimental::fmin)); + } +} diff --git a/SYCL/BFloat16/bfloat16_test_utils.hpp b/SYCL/BFloat16/bfloat16_test_utils.hpp new file mode 100644 index 0000000000..cecd80dee4 --- /dev/null +++ b/SYCL/BFloat16/bfloat16_test_utils.hpp @@ -0,0 +1,165 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +using Bfloat16StorageT = sycl::ext::oneapi::detail::Bfloat16StorageT; + +template +void test_host1(std::initializer_list Input, + std::initializer_list RefOutput, FuncTy Func, + int Line = __builtin_LINE()) { + auto Size = Input.size(); + assert(RefOutput.size() == Size); + + for (int i = 0; i < Size; ++i) { + auto Expected = *(std::begin(RefOutput) + i); + auto Res = Func(*(std::begin(Input) + i)); + if (Expected == Res) + continue; + + std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res + << " != " << Expected << std::endl; + assert(false); + } +} + +template +void test_host2(std::initializer_list Input1, + std::initializer_list Input2, + std::initializer_list RefOutput, FuncTy Func, + int Line = __builtin_LINE()) { + auto Size = Input1.size(); + assert((RefOutput.size() == Size) && (Input2.size() == Size)); + + for (int i = 0; i < Size; ++i) { + auto Expected = *(std::begin(RefOutput) + i); + auto Res = Func(*(std::begin(Input1) + i), *(std::begin(Input2) + i)); + if (Expected == Res) + continue; + + std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res + << " != " << Expected << std::endl; + assert(false); + } +} + +template +void test_host2_marray(std::initializer_list Input1, + std::initializer_list Input2, + std::initializer_list RefOutput, FuncTy Func, + int Line = __builtin_LINE()) { + auto Size = Input1.size(); + assert((RefOutput.size() == Size) && (Input2.size() == Size)); + sycl::marray MAInput1; + sycl::marray MAInput2; + for (size_t i = 0; i < N; i++) { + MAInput1[i] = + sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input1) + i)); + MAInput2[i] = + sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input2) + i)); + } + + sycl::marray res = Func(MAInput1, MAInput2); + for (int i = 0; i < N; ++i) { + auto Expected = *(std::begin(RefOutput) + i); + auto Res = sycl::ext::oneapi::detail::bfloat16ToBits(res[i]); + if (Expected == Res) + continue; + + std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res + << " != " << Expected << std::endl; + assert(false); + } +} + +template +void test_device2(sycl::queue &Q, std::initializer_list Input1, + std::initializer_list Input2, + std::initializer_list RefOutput, FuncTy Func, + int Line = __builtin_LINE()) { + auto Size = Input1.size(); + assert(Size == Input2.size()); + assert(RefOutput.size() == Size); + + sycl::buffer InBuf1(Size); + sycl::buffer InBuf2(Size); + { + sycl::host_accessor InAcc1(InBuf1, sycl::write_only); + sycl::host_accessor InAcc2(InBuf2, sycl::write_only); + for (int i = 0; i < Size; ++i) { + InAcc1[i] = *(std::begin(Input1) + i); + InAcc2[i] = *(std::begin(Input2) + i); + } + } + + sycl::buffer OutBuf(Size); + Q.submit([&](sycl::handler &CGH) { + sycl::accessor InAcc1(InBuf1, CGH, sycl::read_only); + sycl::accessor InAcc2(InBuf2, CGH, sycl::read_only); + sycl::accessor OutAcc(OutBuf, CGH, sycl::write_only); + CGH.parallel_for(Size, [=](sycl::id<1> Id) { + OutAcc[Id] = Func(InAcc1[Id], InAcc2[Id]); + }); + }).wait(); + + sycl::host_accessor Acc(OutBuf, sycl::read_only); + for (int i = 0; i < Size; ++i) { + auto Expected = *(std::begin(RefOutput) + i); + if (Expected == Acc[i]) + continue; + std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Acc[i] + << " != " << Expected << ", input idx was " << i << std::endl; + assert(false); + } +} + +template +void test_device2_marray(sycl::queue &Q, std::initializer_list Input1, + std::initializer_list Input2, + std::initializer_list RefOutput, FuncTy Func, + int Line = __builtin_LINE()) { + auto Size = Input1.size(); + assert(Size == Input2.size()); + assert(RefOutput.size() == Size); + + sycl::marray MAInput1; + sycl::marray MAInput2; + for (size_t i = 0; i < N; i++) { + MAInput1[i] = + sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input1) + i)); + MAInput2[i] = + sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input2) + i)); + } + + sycl::marray res; + { + sycl::buffer, 1> OutBuf(&res, + 1); + Q.submit([&](sycl::handler &CGH) { + sycl::accessor OutAcc(OutBuf, CGH, sycl::write_only); + CGH.single_task([=]() { OutAcc[0] = Func(MAInput1, MAInput2); }); + }).wait(); + } + + for (int i = 0; i < N; ++i) { + auto Expected = *(std::begin(RefOutput) + i); + auto Res = sycl::ext::oneapi::detail::bfloat16ToBits(res[i]); + if (Expected == Res) + continue; + std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res + << " != " << Expected << ", input idx was " << i << std::endl; + assert(false); + } +} + +#define FUNC2(Name) [](auto x, auto y) { return (Name)(x, y); } +#define BF16_FUNC2(Name) \ + [](Bfloat16StorageT x, Bfloat16StorageT y) { \ + return sycl::ext::oneapi::detail::bfloat16ToBits( \ + (Name)(sycl::ext::oneapi::detail::bitsToBfloat16(x), \ + sycl::ext::oneapi::detail::bitsToBfloat16(y))); \ + }