@@ -31,14 +31,14 @@ inline decltype(auto) vector_extract64(T a, int idx, int sign) {
3131 return sign ? (v2int32){a[idx], a[idx + 1 ]} : (v2uint32){a[idx], a[idx + 1 ]};
3232}
3333
34- inline v16int32 vector_broadcast64 (v2int32 b) {
35- return {b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ],
36- b[0 ], b[1 ], b[0 ], b[1 ], b[0 ], b[1 ], b[0 ], b[1 ]};
37- }
38- inline v16uint32 vector_broadcast64 (v2uint32 b) {
39- return {b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ],
40- b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ]};
41- }
34+ # define VECTOR_BROADCAST64_FUNC ( outType, inType ) \
35+ inline outType vector_broadcast64 (inType b) { \
36+ return { b[0 ], b[1 ], b[0 ], b[1 ], b[0 ], b[1 ], b[0 ], b[1 ], \
37+ b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ], b[ 0 ], b[ 1 ]}; \
38+ }
39+
40+ VECTOR_BROADCAST64_FUNC (v16int32, v2int32)
41+ VECTOR_BROADCAST64_FUNC(v16uint32, v2uint32)
4242
4343INTRINSIC(v128int4)
4444shiftx(v128int4 a, v128int4 b, int step, unsigned int shift) {
@@ -1354,10 +1354,10 @@ INTRINSIC(void *) extract_address(v16int32 v, int idx) {
13541354}
13551355// broadcast from scalar (alternative syntax to broadcast to vector)
13561356INTRINSIC (v64int8)
1357- broadcast_s8(char b) { return b - v64int8{0 }; }
1357+ broadcast_s8(int b) { return ( char ) b - v64int8{0 }; }
13581358
13591359INTRINSIC (v32int16)
1360- broadcast_s16(short b) { return b - v32int16{0 }; }
1360+ broadcast_s16(int b) { return ( short ) b - v32int16{0 }; }
13611361
13621362INTRINSIC (v16int32)
13631363broadcast_s32(int b) { return b - v16int32{0 }; }
@@ -1369,10 +1369,10 @@ INTRINSIC(v16int32)
13691369broadcast_v2s32(v2int32 b) { return vector_broadcast64 (b); }
13701370
13711371INTRINSIC (v64uint8)
1372- broadcast_u8(unsigned char b) { return b - v64uint8{0 }; }
1372+ broadcast_u8(unsigned int b) { return ( unsigned char ) b - v64uint8{0 }; }
13731373
13741374INTRINSIC (v32uint16)
1375- broadcast_u16(unsigned short b) { return b - v32uint16{0 }; }
1375+ broadcast_u16(unsigned int b) { return ( unsigned short ) b - v32uint16{0 }; }
13761376
13771377INTRINSIC (v16uint32)
13781378broadcast_u32(unsigned int b) { return b - v16uint32{0 }; }
@@ -1402,10 +1402,10 @@ broadcast_c32 (cint32 b) { return vector_broadcast64(b);}
14021402
14031403// broadcast to vector (alternative syntax to broadcast from scalar)
14041404INTRINSIC (v128int4)
1405- broadcast_to_v128int4(v2int4 b) { return b - v128int4{ 0 } ; }
1405+ broadcast_to_v128int4(v2int4 b) { return broadcast_s8 (( int )( char )b) ; }
14061406
14071407INTRINSIC (v128int4)
1408- broadcast_to_v128int4(v4int4 b) { return broadcast_s16 ((short )b); }
1408+ broadcast_to_v128int4(v4int4 b) { return broadcast_s16 ((int )( short )b); }
14091409
14101410INTRINSIC (v128int4)
14111411broadcast_to_v128int4(v8int4 b) { return broadcast_s32 ((int )b); }
@@ -1414,10 +1414,10 @@ INTRINSIC(v128int4)
14141414broadcast_to_v128int4(v16int4 b) { return vector_broadcast64 ((v2int32)b); }
14151415
14161416INTRINSIC (v64int8)
1417- broadcast_to_v64int8(char b) { return b - v64int8{ 0 } ; }
1417+ broadcast_to_v64int8(int b) { return broadcast_s8 (b) ; }
14181418
14191419INTRINSIC (v64int8)
1420- broadcast_to_v64int8(v2int8 b) { return broadcast_s16 ((short )b); }
1420+ broadcast_to_v64int8(v2int8 b) { return broadcast_s16 ((int )( short )b); }
14211421
14221422INTRINSIC (v64int8)
14231423broadcast_to_v64int8(v4int8 b) { return broadcast_s32 ((int )b); }
@@ -1426,7 +1426,7 @@ INTRINSIC(v64int8)
14261426broadcast_to_v64int8(v8int8 b) { return vector_broadcast64 ((v2int32)b); }
14271427
14281428INTRINSIC (v32int16)
1429- broadcast_to_v32int16(short b) { return b - v32int16{ 0 } ; }
1429+ broadcast_to_v32int16(int b) { return broadcast_s16 (b) ; }
14301430
14311431INTRINSIC (v32int16)
14321432broadcast_to_v32int16(v2int16 b) { return broadcast_s32 ((int )b); }
@@ -1435,7 +1435,7 @@ INTRINSIC(v32int16)
14351435broadcast_to_v32int16(v4int16 b) { return vector_broadcast64 ((v2int32)b); }
14361436
14371437INTRINSIC (v16int32)
1438- broadcast_to_v16int32(int b) { return b - v16int32{ 0 } ; }
1438+ broadcast_to_v16int32(int b) { return broadcast_s32 (b) ; }
14391439
14401440INTRINSIC (v16int32)
14411441broadcast_to_v16int32(mask64 b) { return vector_broadcast64 ((v2int32)b); }
@@ -1444,10 +1444,14 @@ INTRINSIC(v16int32)
14441444broadcast_to_v16int32(v2int32 b) { return vector_broadcast64 (b); }
14451445
14461446INTRINSIC (v128uint4)
1447- broadcast_to_v128uint4(v2uint4 b) { return b - v128uint4{0 }; }
1447+ broadcast_to_v128uint4(v2uint4 b) {
1448+ return broadcast_u8 ((unsigned int )(unsigned char )b);
1449+ }
14481450
14491451INTRINSIC (v128uint4)
1450- broadcast_to_v128uint4(v4uint4 b) { return broadcast_u16 ((unsigned short )b); }
1452+ broadcast_to_v128uint4(v4uint4 b) {
1453+ return broadcast_u16 ((unsigned int )(unsigned short )b);
1454+ }
14511455
14521456INTRINSIC (v128uint4)
14531457broadcast_to_v128uint4(v8uint4 b) { return broadcast_u32 ((unsigned int )b); }
@@ -1456,10 +1460,12 @@ INTRINSIC(v128uint4)
14561460broadcast_to_v128uint4(v16uint4 b) { return vector_broadcast64 ((v2uint32)b); }
14571461
14581462INTRINSIC (v64uint8)
1459- broadcast_to_v64uint8(unsigned char b) { return b - v64uint8{ 0 } ; }
1463+ broadcast_to_v64uint8(unsigned int b) { return broadcast_u8 (b) ; }
14601464
14611465INTRINSIC (v64uint8)
1462- broadcast_to_v64uint8(v2uint8 b) { return broadcast_u16 ((unsigned short )b); }
1466+ broadcast_to_v64uint8(v2uint8 b) {
1467+ return broadcast_u16 ((unsigned int )(unsigned short )b);
1468+ }
14631469
14641470INTRINSIC (v64uint8)
14651471broadcast_to_v64uint8(v4uint8 b) { return broadcast_u32 ((unsigned int )b); }
@@ -1468,7 +1474,7 @@ INTRINSIC(v64uint8)
14681474broadcast_to_v64uint8(v8uint8 b) { return vector_broadcast64 ((v2uint32)b); }
14691475
14701476INTRINSIC (v32uint16)
1471- broadcast_to_v32uint16(unsigned short b) { return b - v32uint16{ 0 } ; }
1477+ broadcast_to_v32uint16(unsigned int b) { return broadcast_u16 (b) ; }
14721478
14731479INTRINSIC (v32uint16)
14741480broadcast_to_v32uint16(v2uint16 b) { return broadcast_u32 ((unsigned int )b); }
@@ -1477,7 +1483,7 @@ INTRINSIC(v32uint16)
14771483broadcast_to_v32uint16(v4uint16 b) { return vector_broadcast64 ((v2uint32)b); }
14781484
14791485INTRINSIC (v16uint32)
1480- broadcast_to_v16uint32(unsigned int b) { return b - v16uint32{ 0 } ; }
1486+ broadcast_to_v16uint32(unsigned int b) { return broadcast_u32 (b) ; }
14811487
14821488INTRINSIC (v16uint32)
14831489broadcast_to_v16uint32(mask64 b) { return vector_broadcast64 ((v2uint32)b); }
@@ -1530,15 +1536,6 @@ broadcast_to_v16float(v2float b) {
15301536 return broadcast_s64 (as_mask64);
15311537}
15321538
1533- INTRINSIC (v32bfloat16)
1534- broadcast_zero_to_v32bfloat16() { return broadcast_to_v32bfloat16 (0 ); }
1535-
1536- INTRINSIC (v32bfloat16)
1537- broadcast_one_to_v32bfloat16() { return broadcast_to_v32bfloat16 (1 ); }
1538-
1539- INTRINSIC (v16float)
1540- broadcast_one_to_v16float() { return broadcast_to_v16float (1 ); }
1541-
15421539// Right-most insertion (left shift)
15431540INTRINSIC (v64int8) shiftl_elem(v64int8 v, int s) {
15441541 return shift_bytes (v, broadcast_s8 (s), 1 );
@@ -1644,13 +1641,16 @@ INTRINSIC(v16uint32) broadcast_one_to_v16uint32() {
16441641INTRINSIC (v32bfloat16) broadcast_one_bfloat16() {
16451642 return broadcast_bfloat16 (1 );
16461643}
1644+ INTRINSIC (v32bfloat16)
1645+ broadcast_one_to_v32bfloat16() { return broadcast_one_bfloat16 (); }
1646+
1647+ INTRINSIC (v16float)
1648+ broadcast_one_to_v16float() { return broadcast_to_v16float (1 ); }
16471649
16481650#if 0
16491651INTRINSIC(v16cint16) broadcast_one_c16() { return broadcast_c16(1); }
16501652
16511653INTRINSIC(v8cint32) broadcast_one_c32() { return broadcast_c32(1); }
1652-
1653- INTRINSIC(v16float) broadcast_one_float() { return broadcast_float(1); }
16541654#endif
16551655
16561656// broadcast value zero(0) to all vector lanes
@@ -1708,13 +1708,9 @@ INTRINSIC(v16uint32) broadcast_zero_to_v16uint32() { return broadcast_u32(0); }
17081708 broadcast_zero_bfloat16 () {
17091709 return broadcast_bfloat16 (0 );
17101710}
1711+ INTRINSIC (v32bfloat16)
1712+ broadcast_zero_to_v32bfloat16() { return broadcast_to_v32bfloat16 (0 ); }
17111713
1712- [[deprecated(
1713- " Function 'broadcast_zero_float' is deprecated. Please use the "
1714- " 'broadcast_zero_to_v16float' variant instead." )]] INTRINSIC(v16float)
1715- broadcast_zero_float () {
1716- return broadcast_float (0 );
1717- }
17181714INTRINSIC (v16float) broadcast_zero_to_v16float() { return broadcast_float (0 ); }
17191715
17201716#if 0
@@ -1791,25 +1787,19 @@ broadcast_elem(v16float v, int idx) {
17911787 return vector_broadcast64 (ext_v2int32 (v, idx, 0 ));
17921788}
17931789
1794- INTRINSIC (v64int8)
1795- broadcast_to_v64int8(int b) { return broadcast_s8 ((int )b); }
1796-
17971790INTRINSIC (v16acc64) broadcast_zero_to_v16acc64() { return v16acc64{}; }
1798-
17991791[[deprecated(" Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18001792 " variant instead." )]] INTRINSIC(v16acc64) clr16() {
18011793 return broadcast_zero_to_v16acc64 ();
18021794}
18031795
18041796INTRINSIC (v32acc64) broadcast_zero_to_v32acc64() { return v32acc64{}; }
1805-
18061797[[deprecated(" Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18071798 " variant instead." )]] INTRINSIC(v32acc64) clr32() {
18081799 return broadcast_zero_to_v32acc64 ();
18091800}
18101801
18111802INTRINSIC (v64acc32) broadcast_zero_to_v64acc32() { return v64acc32{}; }
1812-
18131803[[deprecated(" Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18141804 " variant instead." )]] INTRINSIC(v64acc32) clr64() {
18151805 return broadcast_zero_to_v64acc32 ();
0 commit comments