Skip to content

Commit 526ab7c

Browse files
[AIE2P] Fix broadcast intrinsics.
1 parent 6167e9f commit 526ab7c

File tree

2 files changed

+37
-55
lines changed

2 files changed

+37
-55
lines changed

clang/lib/Headers/aie2p_aie_api_compat.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -327,16 +327,8 @@ inline __attribute__((always_inline)) v16cint16 shift_bytes(v16cint16 , v16cint1
327327
inline __attribute__((always_inline)) v8cint32 shift_bytes(v8cint32 , v8cint32 , unsigned int );
328328
inline __attribute__((always_inline)) v16cint16 shift(v16cint16 , v16cint16 , unsigned int );
329329
inline __attribute__((always_inline)) v8cint32 shift(v8cint32 , v8cint32 , unsigned int );
330-
inline __attribute__((always_inline)) v64int8 broadcast_s8(int );
331-
inline __attribute__((always_inline)) v32int16 broadcast_s16(int );
332-
inline __attribute__((always_inline)) v64uint8 broadcast_u8(unsigned int );
333-
inline __attribute__((always_inline)) v32uint16 broadcast_u16(unsigned int );
334330
inline __attribute__((always_inline)) v16cint16 broadcast_c16(cint16 );
335331
inline __attribute__((always_inline)) v8cint32 broadcast_c32(cint32 );
336-
inline __attribute__((always_inline)) v32int16 broadcast_to_v32int16(int );
337-
inline __attribute__((always_inline)) v64uint8 broadcast_to_v64uint8(unsigned int );
338-
inline __attribute__((always_inline)) v32uint16 broadcast_to_v32uint16(unsigned int );
339-
inline __attribute__((always_inline)) v16uint32 broadcast_to_v16uint32(v2uint32 );
340332
inline __attribute__((always_inline)) v16cint16 broadcast_to_v16cint16(cint16 );
341333
inline __attribute__((always_inline)) v16cint16 broadcast_to_v16cint16(v2cint16 );
342334
inline __attribute__((always_inline)) v8cint32 broadcast_to_v8cint32(cint32 );

clang/lib/Headers/aie2p_scl2vec.h

Lines changed: 37 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ inline decltype(auto) vector_extract64(T a, int idx, int sign) {
3131
return sign ? (v2int32){a[idx], a[idx + 1]} : (v2uint32){a[idx], a[idx + 1]};
3232
}
3333

34-
inline v16int32 vector_broadcast64(v2int32 b) {
35-
return {b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1],
36-
b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1]};
37-
}
38-
inline v16uint32 vector_broadcast64(v2uint32 b) {
39-
return {b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1],
40-
b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1]};
41-
}
34+
#define VECTOR_BROADCAST64_FUNC(outType, inType) \
35+
inline outType vector_broadcast64(inType b) { \
36+
return {b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1], \
37+
b[0], b[1], b[0], b[1], b[0], b[1], b[0], b[1]}; \
38+
}
39+
40+
VECTOR_BROADCAST64_FUNC(v16int32, v2int32)
41+
VECTOR_BROADCAST64_FUNC(v16uint32, v2uint32)
4242

4343
INTRINSIC(v128int4)
4444
shiftx(v128int4 a, v128int4 b, int step, unsigned int shift) {
@@ -1354,10 +1354,10 @@ INTRINSIC(void *) extract_address(v16int32 v, int idx) {
13541354
}
13551355
// broadcast from scalar (alternative syntax to broadcast to vector)
13561356
INTRINSIC(v64int8)
1357-
broadcast_s8(char b) { return b - v64int8{0}; }
1357+
broadcast_s8(int b) { return (char)b - v64int8{0}; }
13581358

13591359
INTRINSIC(v32int16)
1360-
broadcast_s16(short b) { return b - v32int16{0}; }
1360+
broadcast_s16(int b) { return (short)b - v32int16{0}; }
13611361

13621362
INTRINSIC(v16int32)
13631363
broadcast_s32(int b) { return b - v16int32{0}; }
@@ -1369,10 +1369,10 @@ INTRINSIC(v16int32)
13691369
broadcast_v2s32(v2int32 b) { return vector_broadcast64(b); }
13701370

13711371
INTRINSIC(v64uint8)
1372-
broadcast_u8(unsigned char b) { return b - v64uint8{0}; }
1372+
broadcast_u8(unsigned int b) { return (unsigned char)b - v64uint8{0}; }
13731373

13741374
INTRINSIC(v32uint16)
1375-
broadcast_u16(unsigned short b) { return b - v32uint16{0}; }
1375+
broadcast_u16(unsigned int b) { return (unsigned short)b - v32uint16{0}; }
13761376

13771377
INTRINSIC(v16uint32)
13781378
broadcast_u32(unsigned int b) { return b - v16uint32{0}; }
@@ -1402,10 +1402,10 @@ broadcast_c32 (cint32 b) { return vector_broadcast64(b);}
14021402

14031403
// broadcast to vector (alternative syntax to broadcast from scalar)
14041404
INTRINSIC(v128int4)
1405-
broadcast_to_v128int4(v2int4 b) { return b - v128int4{0}; }
1405+
broadcast_to_v128int4(v2int4 b) { return broadcast_s8((int)(char)b); }
14061406

14071407
INTRINSIC(v128int4)
1408-
broadcast_to_v128int4(v4int4 b) { return broadcast_s16((short)b); }
1408+
broadcast_to_v128int4(v4int4 b) { return broadcast_s16((int)(short)b); }
14091409

14101410
INTRINSIC(v128int4)
14111411
broadcast_to_v128int4(v8int4 b) { return broadcast_s32((int)b); }
@@ -1414,10 +1414,10 @@ INTRINSIC(v128int4)
14141414
broadcast_to_v128int4(v16int4 b) { return vector_broadcast64((v2int32)b); }
14151415

14161416
INTRINSIC(v64int8)
1417-
broadcast_to_v64int8(char b) { return b - v64int8{0}; }
1417+
broadcast_to_v64int8(int b) { return broadcast_s8(b); }
14181418

14191419
INTRINSIC(v64int8)
1420-
broadcast_to_v64int8(v2int8 b) { return broadcast_s16((short)b); }
1420+
broadcast_to_v64int8(v2int8 b) { return broadcast_s16((int)(short)b); }
14211421

14221422
INTRINSIC(v64int8)
14231423
broadcast_to_v64int8(v4int8 b) { return broadcast_s32((int)b); }
@@ -1426,7 +1426,7 @@ INTRINSIC(v64int8)
14261426
broadcast_to_v64int8(v8int8 b) { return vector_broadcast64((v2int32)b); }
14271427

14281428
INTRINSIC(v32int16)
1429-
broadcast_to_v32int16(short b) { return b - v32int16{0}; }
1429+
broadcast_to_v32int16(int b) { return broadcast_s16(b); }
14301430

14311431
INTRINSIC(v32int16)
14321432
broadcast_to_v32int16(v2int16 b) { return broadcast_s32((int)b); }
@@ -1435,7 +1435,7 @@ INTRINSIC(v32int16)
14351435
broadcast_to_v32int16(v4int16 b) { return vector_broadcast64((v2int32)b); }
14361436

14371437
INTRINSIC(v16int32)
1438-
broadcast_to_v16int32(int b) { return b - v16int32{0}; }
1438+
broadcast_to_v16int32(int b) { return broadcast_s32(b); }
14391439

14401440
INTRINSIC(v16int32)
14411441
broadcast_to_v16int32(mask64 b) { return vector_broadcast64((v2int32)b); }
@@ -1444,10 +1444,14 @@ INTRINSIC(v16int32)
14441444
broadcast_to_v16int32(v2int32 b) { return vector_broadcast64(b); }
14451445

14461446
INTRINSIC(v128uint4)
1447-
broadcast_to_v128uint4(v2uint4 b) { return b - v128uint4{0}; }
1447+
broadcast_to_v128uint4(v2uint4 b) {
1448+
return broadcast_u8((unsigned int)(unsigned char)b);
1449+
}
14481450

14491451
INTRINSIC(v128uint4)
1450-
broadcast_to_v128uint4(v4uint4 b) { return broadcast_u16((unsigned short)b); }
1452+
broadcast_to_v128uint4(v4uint4 b) {
1453+
return broadcast_u16((unsigned int)(unsigned short)b);
1454+
}
14511455

14521456
INTRINSIC(v128uint4)
14531457
broadcast_to_v128uint4(v8uint4 b) { return broadcast_u32((unsigned int)b); }
@@ -1456,10 +1460,12 @@ INTRINSIC(v128uint4)
14561460
broadcast_to_v128uint4(v16uint4 b) { return vector_broadcast64((v2uint32)b); }
14571461

14581462
INTRINSIC(v64uint8)
1459-
broadcast_to_v64uint8(unsigned char b) { return b - v64uint8{0}; }
1463+
broadcast_to_v64uint8(unsigned int b) { return broadcast_u8(b); }
14601464

14611465
INTRINSIC(v64uint8)
1462-
broadcast_to_v64uint8(v2uint8 b) { return broadcast_u16((unsigned short)b); }
1466+
broadcast_to_v64uint8(v2uint8 b) {
1467+
return broadcast_u16((unsigned int)(unsigned short)b);
1468+
}
14631469

14641470
INTRINSIC(v64uint8)
14651471
broadcast_to_v64uint8(v4uint8 b) { return broadcast_u32((unsigned int)b); }
@@ -1468,7 +1474,7 @@ INTRINSIC(v64uint8)
14681474
broadcast_to_v64uint8(v8uint8 b) { return vector_broadcast64((v2uint32)b); }
14691475

14701476
INTRINSIC(v32uint16)
1471-
broadcast_to_v32uint16(unsigned short b) { return b - v32uint16{0}; }
1477+
broadcast_to_v32uint16(unsigned int b) { return broadcast_u16(b); }
14721478

14731479
INTRINSIC(v32uint16)
14741480
broadcast_to_v32uint16(v2uint16 b) { return broadcast_u32((unsigned int)b); }
@@ -1477,7 +1483,7 @@ INTRINSIC(v32uint16)
14771483
broadcast_to_v32uint16(v4uint16 b) { return vector_broadcast64((v2uint32)b); }
14781484

14791485
INTRINSIC(v16uint32)
1480-
broadcast_to_v16uint32(unsigned int b) { return b - v16uint32{0}; }
1486+
broadcast_to_v16uint32(unsigned int b) { return broadcast_u32(b); }
14811487

14821488
INTRINSIC(v16uint32)
14831489
broadcast_to_v16uint32(mask64 b) { return vector_broadcast64((v2uint32)b); }
@@ -1530,15 +1536,6 @@ broadcast_to_v16float(v2float b) {
15301536
return broadcast_s64(as_mask64);
15311537
}
15321538

1533-
INTRINSIC(v32bfloat16)
1534-
broadcast_zero_to_v32bfloat16() { return broadcast_to_v32bfloat16(0); }
1535-
1536-
INTRINSIC(v32bfloat16)
1537-
broadcast_one_to_v32bfloat16() { return broadcast_to_v32bfloat16(1); }
1538-
1539-
INTRINSIC(v16float)
1540-
broadcast_one_to_v16float() { return broadcast_to_v16float(1); }
1541-
15421539
// Right-most insertion (left shift)
15431540
INTRINSIC(v64int8) shiftl_elem(v64int8 v, int s) {
15441541
return shift_bytes(v, broadcast_s8(s), 1);
@@ -1644,13 +1641,16 @@ INTRINSIC(v16uint32) broadcast_one_to_v16uint32() {
16441641
INTRINSIC(v32bfloat16) broadcast_one_bfloat16() {
16451642
return broadcast_bfloat16(1);
16461643
}
1644+
INTRINSIC(v32bfloat16)
1645+
broadcast_one_to_v32bfloat16() { return broadcast_one_bfloat16(); }
1646+
1647+
INTRINSIC(v16float)
1648+
broadcast_one_to_v16float() { return broadcast_to_v16float(1); }
16471649

16481650
#if 0
16491651
INTRINSIC(v16cint16) broadcast_one_c16() { return broadcast_c16(1); }
16501652

16511653
INTRINSIC(v8cint32) broadcast_one_c32() { return broadcast_c32(1); }
1652-
1653-
INTRINSIC(v16float) broadcast_one_float() { return broadcast_float(1); }
16541654
#endif
16551655

16561656
// broadcast value zero(0) to all vector lanes
@@ -1708,13 +1708,9 @@ INTRINSIC(v16uint32) broadcast_zero_to_v16uint32() { return broadcast_u32(0); }
17081708
broadcast_zero_bfloat16() {
17091709
return broadcast_bfloat16(0);
17101710
}
1711+
INTRINSIC(v32bfloat16)
1712+
broadcast_zero_to_v32bfloat16() { return broadcast_to_v32bfloat16(0); }
17111713

1712-
[[deprecated(
1713-
"Function 'broadcast_zero_float' is deprecated. Please use the "
1714-
"'broadcast_zero_to_v16float' variant instead.")]] INTRINSIC(v16float)
1715-
broadcast_zero_float() {
1716-
return broadcast_float(0);
1717-
}
17181714
INTRINSIC(v16float) broadcast_zero_to_v16float() { return broadcast_float(0); }
17191715

17201716
#if 0
@@ -1791,25 +1787,19 @@ broadcast_elem(v16float v, int idx) {
17911787
return vector_broadcast64(ext_v2int32(v, idx, 0));
17921788
}
17931789

1794-
INTRINSIC(v64int8)
1795-
broadcast_to_v64int8(int b) { return broadcast_s8((int)b); }
1796-
17971790
INTRINSIC(v16acc64) broadcast_zero_to_v16acc64() { return v16acc64{}; }
1798-
17991791
[[deprecated("Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18001792
"variant instead.")]] INTRINSIC(v16acc64) clr16() {
18011793
return broadcast_zero_to_v16acc64();
18021794
}
18031795

18041796
INTRINSIC(v32acc64) broadcast_zero_to_v32acc64() { return v32acc64{}; }
1805-
18061797
[[deprecated("Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18071798
"variant instead.")]] INTRINSIC(v32acc64) clr32() {
18081799
return broadcast_zero_to_v32acc64();
18091800
}
18101801

18111802
INTRINSIC(v64acc32) broadcast_zero_to_v64acc32() { return v64acc32{}; }
1812-
18131803
[[deprecated("Function 'clr' is deprecated. Please use the 'broadcast_zero_to' "
18141804
"variant instead.")]] INTRINSIC(v64acc32) clr64() {
18151805
return broadcast_zero_to_v64acc32();

0 commit comments

Comments
 (0)