@@ -2599,38 +2599,14 @@ HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
2599
2599
2600
2600
template <class D , HWY_IF_F32_D(D)>
2601
2601
HWY_API VFromD<D> PromoteTo (D df, Vec<Rebind<uint16_t , D>> v) {
2602
- const RebindToUnsigned<decltype (df)> du32;
2603
-
2604
- // Floats have 23 bits of mantissa.
2605
- // We want least significant 8 bits to be shifted to [ 0 .. 255 ], therefore need to add 2^23
2606
- // See this page for details: https://www.h-schmidt.net/FloatConverter/IEEE754.html
2607
- // If you want output floats in [ 0 .. 255.0 / 256.0 ] interval, change into 2^15 = 0x47000000
2608
- constexpr uint32_t offsetValue = 0x4b000000 ;
2609
- // Check disassembly & verify your compiler has moved this initialization outside the loop
2610
- const auto offsetInt = Set (du32, offsetValue);
2611
- // Bitwise is probably slightly faster than addition, delivers same results for our input
2612
- auto u32 = PromoteTo (du32, v);
2613
- u32 = Or (u32 , offsetInt);
2614
- // The only FP operation required is subtraction, hopefully faster than UCVTF
2615
- return Sub (BitCast (df, u32 ), BitCast (df, offsetInt));
2602
+ const RebindToUnsigned<decltype (df)> du32;
2603
+ return ConvertTo (df, PromoteTo (du32, v));
2616
2604
}
2617
2605
2618
2606
template <class D , HWY_IF_F32_D(D)>
2619
2607
HWY_API VFromD<D> PromoteTo (D df, Vec<Rebind<uint8_t , D>> v) {
2620
- const RebindToUnsigned<decltype (df)> du32;
2621
-
2622
- // Floats have 23 bits of mantissa.
2623
- // We want least significant 8 bits to be shifted to [ 0 .. 255 ], therefore need to add 2^23
2624
- // See this page for details: https://www.h-schmidt.net/FloatConverter/IEEE754.html
2625
- // If you want output floats in [ 0 .. 255.0 / 256.0 ] interval, change into 2^15 = 0x47000000
2626
- constexpr uint32_t offsetValue = 0x4b000000 ;
2627
- // Check disassembly & verify your compiler has moved this initialization outside the loop
2628
- const auto offsetInt = Set (du32, offsetValue);
2629
- // Bitwise is probably slightly faster than addition, delivers same results for our input
2630
- auto u32 = PromoteTo (du32, v);
2631
- u32 = Or (u32 , offsetInt);
2632
- // The only FP operation required is subtraction, hopefully faster than UCVTF
2633
- return Sub (BitCast (df, u32 ), BitCast (df, offsetInt));
2608
+ const RebindToUnsigned<decltype (df)> du32;
2609
+ return ConvertTo (df, PromoteTo (du32, v));
2634
2610
}
2635
2611
2636
2612
template <class D , HWY_IF_U8_D(D)>
0 commit comments