2525#include < cstring>
2626#include < iostream>
2727#include < type_traits>
28- #if defined(__F16C__)
29- #include < x86intrin.h>
30- #endif
3128
3229namespace svs {
3330namespace float16 {
3431namespace detail {
3532
36- // TODO: Update to `bitcast` if its available in the standard library.
3733inline uint32_t bitcast_float_to_uint32 (const float x) {
3834 static_assert (sizeof (float ) == sizeof (uint32_t ));
39- uint32_t u;
40- memcpy (&u, &x, sizeof (x));
41- return u;
35+ return std::bit_cast<uint32_t >(x);
4236}
4337
4438inline float bitcast_uint32_to_float (const uint32_t x) {
4539 static_assert (sizeof (float ) == sizeof (uint32_t ));
46- float f;
47- memcpy (&f, &x, sizeof (x));
48- return f;
40+ return std::bit_cast<float >(x);
4941}
5042
5143// reference:
@@ -72,38 +64,12 @@ inline uint16_t float_to_float16_untyped_slow(const float x) {
7264 0x7FFF ; // sign : normalized : denormalized : saturate
7365}
7466
75- // If the processor is new enough, we can use hardware intrinsics to perform the conversion
76- // without using bit-level manipulation.
77- //
78- // Here, we check if the `F16C` set is enabled and if so, we define the intrinsic based
79- // conversion functions.
80- //
81- // The entry point for users of the conversion is `*_to_*_untyped`, which will dispatch
82- // to either the slow or fast version, depending on the architecture.
83- #if defined(__F16C__)
84- inline float float16_to_float_untyped_fast (const uint16_t x) {
85- auto converted = _mm_cvtph_ps (_mm_set1_epi16 (std::bit_cast<int16_t >(x)));
86- return _mm_cvtss_f32 (converted);
87- }
88- inline uint16_t float_to_float16_untyped_fast (const float x) {
89- auto converted = _mm_cvtps_ph (__m128{x}, _MM_FROUND_NO_EXC);
90- return _mm_extract_epi16 (converted, 0 );
91- }
92-
93- inline float float16_to_float_untyped (const uint16_t x) {
94- return float16_to_float_untyped_fast (x);
95- }
96- inline uint16_t float_to_float16_untyped (const float x) {
97- return float_to_float16_untyped_fast (x);
98- }
99- #else
10067inline float float16_to_float_untyped (const uint16_t x) {
10168 return float16_to_float_untyped_slow (x);
10269}
10370inline uint16_t float_to_float16_untyped (const float x) {
10471 return float_to_float16_untyped_slow (x);
10572}
106- #endif
10773} // namespace detail
10874
10975// On GCC - we need to add this attribute so that Float16 members can appear inside
0 commit comments