diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 3cd923c0ba058..bfc961df95dc8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -18,12 +18,14 @@ #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" #include "WebAssemblyUtilities.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -3214,20 +3216,26 @@ static SDValue performTruncateCombine(SDNode *N, static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + using namespace llvm::SDPatternMatch; auto &DAG = DCI.DAG; SDLoc DL(N); SDValue Src = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = Src.getValueType(); - // bitcast to iN + bool Vectorizable = DCI.isBeforeLegalize() && VT.isScalarInteger() && + SrcVT.isFixedLengthVector() && + SrcVT.getScalarType() == MVT::i1; + + if (!Vectorizable) + return SDValue(); + + unsigned NumElts = SrcVT.getVectorNumElements(); + EVT Width = MVT::getIntegerVT(128 / NumElts); + + // bitcast to iN, where N = 2, 4, 8, 16 (legal) // ==> bitmask - if (DCI.isBeforeLegalize() && VT.isScalarInteger() && - SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) { - unsigned NumElts = SrcVT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) - return SDValue(); - EVT Width = MVT::getIntegerVT(128 / NumElts); + if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) { return DAG.getZExtOrTrunc( DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), @@ -3236,6 +3244,57 @@ static SDValue performBitcastCombine(SDNode *N, DL, VT); } + // bitcast (setcc ...) to concat iN, where N = 32 and 64 (illegal) + if (NumElts == 32 || NumElts == 64) { + // Strategy: We will setcc them seperately in v16i1 + // Bitcast them to i16, extend them to either i32 or i64. + // Add them together, shifting left 1 by 1. + SDValue Concat, SetCCVector; + ISD::CondCode SetCond; + + if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), + m_VectorVT(m_Value(SetCCVector)), + m_CondCode(SetCond))))) + return SDValue(); + // COMMITTED at this point, SDValue() if match fails. + if (Concat.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + // CHECK IF VECTOR is a constant, i.e all values are the same + if (!ISD::isBuildVectorOfConstantSDNodes(SetCCVector.getNode())) + return SDValue(); + + SmallVector Vec; + for (SDValue Const : SetCCVector->ops()) { + Vec.push_back(Const); + if (Vec.size() >= 16) + break; + } + + // Build our own version of splat Vector. + SDValue SplitSetCCVec = DAG.getBuildVector(MVT::v16i8, DL, Vec); + + SmallVector VectorsToShuffle; + for (SDValue V : Concat->ops()) + VectorsToShuffle.push_back(DAG.getBitcast( + MVT::i16, DAG.getSetCC(DL, MVT::v16i1, V, SplitSetCCVec, SetCond))); + + MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64; + SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType); + + for (SDValue V : VectorsToShuffle) { + ReturningInteger = DAG.getNode( + ISD::SHL, DL, ReturnType, + {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger}); + + SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType); + ReturningInteger = + DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV}); + } + + // ReturningInteger->print(llvm::errs()); + return ReturningInteger; + } + return SDValue(); } diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll new file mode 100644 index 0000000000000..58152afbfcb5a --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll @@ -0,0 +1,548 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + + +target triple = "wasm64" + + +define i16 @legal_bitcast_v16i8(<16 x i8> %x) { +; CHECK-LABEL: legal_bitcast_v16i8: +; CHECK: .functype legal_bitcast_v16i8 (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const $push0=, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 +; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0 +; CHECK-NEXT: i8x16.bitmask $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %z = icmp eq <16 x i8> %x, splat (i8 16) + %res = bitcast <16 x i1> %z to i16 + ret i16 %res +} + +define i32 @optimize_illegal_bitcast_v32i8(<32 x i8> %x) { +; CHECK-LABEL: optimize_illegal_bitcast_v32i8: +; CHECK: .functype optimize_illegal_bitcast_v32i8 (v128, v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: v128.const $push10=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 +; CHECK-NEXT: local.tee $push9=, $2=, $pop10 +; CHECK-NEXT: i8x16.eq $push0=, $0, $pop9 +; CHECK-NEXT: i8x16.bitmask $push1=, $pop0 +; CHECK-NEXT: i32.const $push8=, 16 +; CHECK-NEXT: i32.add $push3=, $pop1, $pop8 +; CHECK-NEXT: i32.shl $push4=, $pop2, $pop3 +; CHECK-NEXT: i8x16.eq $push5=, $1, $2 +; CHECK-NEXT: i8x16.bitmask $push6=, $pop5 +; CHECK-NEXT: i32.add $push7=, $pop4, $pop6 +; CHECK-NEXT: return $pop7 + %z = icmp eq <32 x i8> %x, splat (i8 32) + %res = bitcast <32 x i1> %z to i32 + ret i32 %res +} + + +define i64 @optimize_illegal_bitcast_v64i8(<64 x i8> %x) { +; CHECK-LABEL: optimize_illegal_bitcast_v64i8: +; CHECK: .functype optimize_illegal_bitcast_v64i8 (v128, v128, v128, v128) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.const $push3=, 16 +; CHECK-NEXT: i64.const $push24=, 16 +; CHECK-NEXT: i64.const $push23=, 16 +; CHECK-NEXT: v128.const $push22=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 +; CHECK-NEXT: local.tee $push21=, $4=, $pop22 +; CHECK-NEXT: i8x16.eq $push0=, $0, $pop21 +; CHECK-NEXT: i8x16.bitmask $push1=, $pop0 +; CHECK-NEXT: i64.extend_i32_u $push2=, $pop1 +; CHECK-NEXT: i64.const $push20=, 16 +; CHECK-NEXT: i64.add $push4=, $pop2, $pop20 +; CHECK-NEXT: i64.shl $push5=, $pop23, $pop4 +; CHECK-NEXT: i8x16.eq $push6=, $1, $4 +; CHECK-NEXT: i8x16.bitmask $push7=, $pop6 +; CHECK-NEXT: i64.extend_i32_u $push8=, $pop7 +; CHECK-NEXT: i64.add $push9=, $pop5, $pop8 +; CHECK-NEXT: i64.shl $push10=, $pop24, $pop9 +; CHECK-NEXT: i8x16.eq $push11=, $2, $4 +; CHECK-NEXT: i8x16.bitmask $push12=, $pop11 +; CHECK-NEXT: i64.extend_i32_u $push13=, $pop12 +; CHECK-NEXT: i64.add $push14=, $pop10, $pop13 +; CHECK-NEXT: i64.shl $push15=, $pop3, $pop14 +; CHECK-NEXT: i8x16.eq $push16=, $3, $4 +; CHECK-NEXT: i8x16.bitmask $push17=, $pop16 +; CHECK-NEXT: i64.extend_i32_u $push18=, $pop17 +; CHECK-NEXT: i64.add $push19=, $pop15, $pop18 +; CHECK-NEXT: return $pop19 + %z = icmp eq <64 x i8> %x, splat (i8 64) + %res = bitcast <64 x i1> %z to i64 + ret i64 %res +} + +define i64 @optimize_illegal_bitcast_v64i4(<64 x i4> %x) { +; CHECK-LABEL: optimize_illegal_bitcast_v64i4: +; CHECK: .functype optimize_illegal_bitcast_v64i4 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push355=, __stack_pointer +; CHECK-NEXT: i64.const $push356=, 16 +; CHECK-NEXT: i64.sub $drop=, $pop355, $pop356 +; CHECK-NEXT: i8x16.splat $push273=, $0 +; CHECK-NEXT: i8x16.replace_lane $push274=, $pop273, 1, $1 +; CHECK-NEXT: i8x16.replace_lane $push275=, $pop274, 2, $2 +; CHECK-NEXT: i8x16.replace_lane $push276=, $pop275, 3, $3 +; CHECK-NEXT: i8x16.replace_lane $push277=, $pop276, 4, $4 +; CHECK-NEXT: i8x16.replace_lane $push278=, $pop277, 5, $5 +; CHECK-NEXT: i8x16.replace_lane $push279=, $pop278, 6, $6 +; CHECK-NEXT: i8x16.replace_lane $push280=, $pop279, 7, $7 +; CHECK-NEXT: i8x16.replace_lane $push281=, $pop280, 8, $8 +; CHECK-NEXT: i8x16.replace_lane $push282=, $pop281, 9, $9 +; CHECK-NEXT: i8x16.replace_lane $push283=, $pop282, 10, $10 +; CHECK-NEXT: i8x16.replace_lane $push284=, $pop283, 11, $11 +; CHECK-NEXT: i8x16.replace_lane $push285=, $pop284, 12, $12 +; CHECK-NEXT: i8x16.replace_lane $push286=, $pop285, 13, $13 +; CHECK-NEXT: i8x16.replace_lane $push287=, $pop286, 14, $14 +; CHECK-NEXT: i8x16.replace_lane $push288=, $pop287, 15, $15 +; CHECK-NEXT: v128.const $push460=, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 +; CHECK-NEXT: local.tee $push459=, $64=, $pop460 +; CHECK-NEXT: v128.and $push289=, $pop288, $pop459 +; CHECK-NEXT: v128.const $push458=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: local.tee $push457=, $65=, $pop458 +; CHECK-NEXT: i8x16.eq $push456=, $pop289, $pop457 +; CHECK-NEXT: local.tee $push455=, $66=, $pop456 +; CHECK-NEXT: i8x16.extract_lane_u $push290=, $pop455, 0 +; CHECK-NEXT: i32.const $push18=, 1 +; CHECK-NEXT: i32.and $push291=, $pop290, $pop18 +; CHECK-NEXT: i8x16.extract_lane_u $push292=, $66, 1 +; CHECK-NEXT: i32.const $push454=, 1 +; CHECK-NEXT: i32.and $push293=, $pop292, $pop454 +; CHECK-NEXT: i32.const $push453=, 1 +; CHECK-NEXT: i32.shl $push294=, $pop293, $pop453 +; CHECK-NEXT: i32.or $push295=, $pop291, $pop294 +; CHECK-NEXT: i8x16.extract_lane_u $push296=, $66, 2 +; CHECK-NEXT: i32.const $push452=, 1 +; CHECK-NEXT: i32.and $push297=, $pop296, $pop452 +; CHECK-NEXT: i32.const $push121=, 2 +; CHECK-NEXT: i32.shl $push298=, $pop297, $pop121 +; CHECK-NEXT: i32.or $push299=, $pop295, $pop298 +; CHECK-NEXT: i8x16.extract_lane_u $push300=, $66, 3 +; CHECK-NEXT: i32.const $push451=, 1 +; CHECK-NEXT: i32.and $push301=, $pop300, $pop451 +; CHECK-NEXT: i32.const $push126=, 3 +; CHECK-NEXT: i32.shl $push302=, $pop301, $pop126 +; CHECK-NEXT: i32.or $push303=, $pop299, $pop302 +; CHECK-NEXT: i8x16.extract_lane_u $push304=, $66, 4 +; CHECK-NEXT: i32.const $push450=, 1 +; CHECK-NEXT: i32.and $push305=, $pop304, $pop450 +; CHECK-NEXT: i32.const $push131=, 4 +; CHECK-NEXT: i32.shl $push306=, $pop305, $pop131 +; CHECK-NEXT: i32.or $push307=, $pop303, $pop306 +; CHECK-NEXT: i8x16.extract_lane_u $push308=, $66, 5 +; CHECK-NEXT: i32.const $push449=, 1 +; CHECK-NEXT: i32.and $push309=, $pop308, $pop449 +; CHECK-NEXT: i32.const $push136=, 5 +; CHECK-NEXT: i32.shl $push310=, $pop309, $pop136 +; CHECK-NEXT: i32.or $push311=, $pop307, $pop310 +; CHECK-NEXT: i8x16.extract_lane_u $push312=, $66, 6 +; CHECK-NEXT: i32.const $push448=, 1 +; CHECK-NEXT: i32.and $push313=, $pop312, $pop448 +; CHECK-NEXT: i32.const $push141=, 6 +; CHECK-NEXT: i32.shl $push314=, $pop313, $pop141 +; CHECK-NEXT: i32.or $push315=, $pop311, $pop314 +; CHECK-NEXT: i8x16.extract_lane_u $push316=, $66, 7 +; CHECK-NEXT: i32.const $push447=, 1 +; CHECK-NEXT: i32.and $push317=, $pop316, $pop447 +; CHECK-NEXT: i32.const $push146=, 7 +; CHECK-NEXT: i32.shl $push318=, $pop317, $pop146 +; CHECK-NEXT: i32.or $push319=, $pop315, $pop318 +; CHECK-NEXT: i8x16.extract_lane_u $push320=, $66, 8 +; CHECK-NEXT: i32.const $push446=, 1 +; CHECK-NEXT: i32.and $push321=, $pop320, $pop446 +; CHECK-NEXT: i32.const $push151=, 8 +; CHECK-NEXT: i32.shl $push322=, $pop321, $pop151 +; CHECK-NEXT: i32.or $push323=, $pop319, $pop322 +; CHECK-NEXT: i8x16.extract_lane_u $push324=, $66, 9 +; CHECK-NEXT: i32.const $push445=, 1 +; CHECK-NEXT: i32.and $push325=, $pop324, $pop445 +; CHECK-NEXT: i32.const $push156=, 9 +; CHECK-NEXT: i32.shl $push326=, $pop325, $pop156 +; CHECK-NEXT: i32.or $push327=, $pop323, $pop326 +; CHECK-NEXT: i8x16.extract_lane_u $push328=, $66, 10 +; CHECK-NEXT: i32.const $push444=, 1 +; CHECK-NEXT: i32.and $push329=, $pop328, $pop444 +; CHECK-NEXT: i32.const $push161=, 10 +; CHECK-NEXT: i32.shl $push330=, $pop329, $pop161 +; CHECK-NEXT: i32.or $push331=, $pop327, $pop330 +; CHECK-NEXT: i8x16.extract_lane_u $push332=, $66, 11 +; CHECK-NEXT: i32.const $push443=, 1 +; CHECK-NEXT: i32.and $push333=, $pop332, $pop443 +; CHECK-NEXT: i32.const $push166=, 11 +; CHECK-NEXT: i32.shl $push334=, $pop333, $pop166 +; CHECK-NEXT: i32.or $push335=, $pop331, $pop334 +; CHECK-NEXT: i8x16.extract_lane_u $push336=, $66, 12 +; CHECK-NEXT: i32.const $push442=, 1 +; CHECK-NEXT: i32.and $push337=, $pop336, $pop442 +; CHECK-NEXT: i32.const $push171=, 12 +; CHECK-NEXT: i32.shl $push338=, $pop337, $pop171 +; CHECK-NEXT: i32.or $push339=, $pop335, $pop338 +; CHECK-NEXT: i8x16.extract_lane_u $push340=, $66, 13 +; CHECK-NEXT: i32.const $push441=, 1 +; CHECK-NEXT: i32.and $push341=, $pop340, $pop441 +; CHECK-NEXT: i32.const $push176=, 13 +; CHECK-NEXT: i32.shl $push342=, $pop341, $pop176 +; CHECK-NEXT: i32.or $push343=, $pop339, $pop342 +; CHECK-NEXT: i8x16.extract_lane_u $push344=, $66, 14 +; CHECK-NEXT: i32.const $push440=, 1 +; CHECK-NEXT: i32.and $push345=, $pop344, $pop440 +; CHECK-NEXT: i32.const $push181=, 14 +; CHECK-NEXT: i32.shl $push346=, $pop345, $pop181 +; CHECK-NEXT: i32.or $push347=, $pop343, $pop346 +; CHECK-NEXT: i8x16.extract_lane_u $push348=, $66, 15 +; CHECK-NEXT: i32.const $push185=, 15 +; CHECK-NEXT: i32.shl $push349=, $pop348, $pop185 +; CHECK-NEXT: i32.or $push350=, $pop347, $pop349 +; CHECK-NEXT: i32.const $push188=, 65535 +; CHECK-NEXT: i32.and $push351=, $pop350, $pop188 +; CHECK-NEXT: i8x16.splat $push194=, $16 +; CHECK-NEXT: i8x16.replace_lane $push195=, $pop194, 1, $17 +; CHECK-NEXT: i8x16.replace_lane $push196=, $pop195, 2, $18 +; CHECK-NEXT: i8x16.replace_lane $push197=, $pop196, 3, $19 +; CHECK-NEXT: i8x16.replace_lane $push198=, $pop197, 4, $20 +; CHECK-NEXT: i8x16.replace_lane $push199=, $pop198, 5, $21 +; CHECK-NEXT: i8x16.replace_lane $push200=, $pop199, 6, $22 +; CHECK-NEXT: i8x16.replace_lane $push201=, $pop200, 7, $23 +; CHECK-NEXT: i8x16.replace_lane $push202=, $pop201, 8, $24 +; CHECK-NEXT: i8x16.replace_lane $push203=, $pop202, 9, $25 +; CHECK-NEXT: i8x16.replace_lane $push204=, $pop203, 10, $26 +; CHECK-NEXT: i8x16.replace_lane $push205=, $pop204, 11, $27 +; CHECK-NEXT: i8x16.replace_lane $push206=, $pop205, 12, $28 +; CHECK-NEXT: i8x16.replace_lane $push207=, $pop206, 13, $29 +; CHECK-NEXT: i8x16.replace_lane $push208=, $pop207, 14, $30 +; CHECK-NEXT: i8x16.replace_lane $push209=, $pop208, 15, $31 +; CHECK-NEXT: v128.and $push210=, $pop209, $64 +; CHECK-NEXT: i8x16.eq $push439=, $pop210, $65 +; CHECK-NEXT: local.tee $push438=, $66=, $pop439 +; CHECK-NEXT: i8x16.extract_lane_u $push270=, $pop438, 15 +; CHECK-NEXT: i32.const $push93=, 31 +; CHECK-NEXT: i32.shl $push271=, $pop270, $pop93 +; CHECK-NEXT: i8x16.extract_lane_u $push266=, $66, 14 +; CHECK-NEXT: i32.const $push437=, 1 +; CHECK-NEXT: i32.and $push267=, $pop266, $pop437 +; CHECK-NEXT: i32.const $push89=, 30 +; CHECK-NEXT: i32.shl $push268=, $pop267, $pop89 +; CHECK-NEXT: i8x16.extract_lane_u $push262=, $66, 13 +; CHECK-NEXT: i32.const $push436=, 1 +; CHECK-NEXT: i32.and $push263=, $pop262, $pop436 +; CHECK-NEXT: i32.const $push84=, 29 +; CHECK-NEXT: i32.shl $push264=, $pop263, $pop84 +; CHECK-NEXT: i8x16.extract_lane_u $push258=, $66, 12 +; CHECK-NEXT: i32.const $push435=, 1 +; CHECK-NEXT: i32.and $push259=, $pop258, $pop435 +; CHECK-NEXT: i32.const $push79=, 28 +; CHECK-NEXT: i32.shl $push260=, $pop259, $pop79 +; CHECK-NEXT: i8x16.extract_lane_u $push254=, $66, 11 +; CHECK-NEXT: i32.const $push434=, 1 +; CHECK-NEXT: i32.and $push255=, $pop254, $pop434 +; CHECK-NEXT: i32.const $push74=, 27 +; CHECK-NEXT: i32.shl $push256=, $pop255, $pop74 +; CHECK-NEXT: i8x16.extract_lane_u $push250=, $66, 10 +; CHECK-NEXT: i32.const $push433=, 1 +; CHECK-NEXT: i32.and $push251=, $pop250, $pop433 +; CHECK-NEXT: i32.const $push69=, 26 +; CHECK-NEXT: i32.shl $push252=, $pop251, $pop69 +; CHECK-NEXT: i8x16.extract_lane_u $push246=, $66, 9 +; CHECK-NEXT: i32.const $push432=, 1 +; CHECK-NEXT: i32.and $push247=, $pop246, $pop432 +; CHECK-NEXT: i32.const $push64=, 25 +; CHECK-NEXT: i32.shl $push248=, $pop247, $pop64 +; CHECK-NEXT: i8x16.extract_lane_u $push242=, $66, 8 +; CHECK-NEXT: i32.const $push431=, 1 +; CHECK-NEXT: i32.and $push243=, $pop242, $pop431 +; CHECK-NEXT: i32.const $push59=, 24 +; CHECK-NEXT: i32.shl $push244=, $pop243, $pop59 +; CHECK-NEXT: i8x16.extract_lane_u $push238=, $66, 7 +; CHECK-NEXT: i32.const $push430=, 1 +; CHECK-NEXT: i32.and $push239=, $pop238, $pop430 +; CHECK-NEXT: i32.const $push54=, 23 +; CHECK-NEXT: i32.shl $push240=, $pop239, $pop54 +; CHECK-NEXT: i8x16.extract_lane_u $push234=, $66, 6 +; CHECK-NEXT: i32.const $push429=, 1 +; CHECK-NEXT: i32.and $push235=, $pop234, $pop429 +; CHECK-NEXT: i32.const $push49=, 22 +; CHECK-NEXT: i32.shl $push236=, $pop235, $pop49 +; CHECK-NEXT: i8x16.extract_lane_u $push230=, $66, 5 +; CHECK-NEXT: i32.const $push428=, 1 +; CHECK-NEXT: i32.and $push231=, $pop230, $pop428 +; CHECK-NEXT: i32.const $push44=, 21 +; CHECK-NEXT: i32.shl $push232=, $pop231, $pop44 +; CHECK-NEXT: i8x16.extract_lane_u $push226=, $66, 4 +; CHECK-NEXT: i32.const $push427=, 1 +; CHECK-NEXT: i32.and $push227=, $pop226, $pop427 +; CHECK-NEXT: i32.const $push39=, 20 +; CHECK-NEXT: i32.shl $push228=, $pop227, $pop39 +; CHECK-NEXT: i8x16.extract_lane_u $push222=, $66, 3 +; CHECK-NEXT: i32.const $push426=, 1 +; CHECK-NEXT: i32.and $push223=, $pop222, $pop426 +; CHECK-NEXT: i32.const $push34=, 19 +; CHECK-NEXT: i32.shl $push224=, $pop223, $pop34 +; CHECK-NEXT: i8x16.extract_lane_u $push218=, $66, 2 +; CHECK-NEXT: i32.const $push425=, 1 +; CHECK-NEXT: i32.and $push219=, $pop218, $pop425 +; CHECK-NEXT: i32.const $push29=, 18 +; CHECK-NEXT: i32.shl $push220=, $pop219, $pop29 +; CHECK-NEXT: i8x16.extract_lane_u $push214=, $66, 1 +; CHECK-NEXT: i32.const $push424=, 1 +; CHECK-NEXT: i32.and $push215=, $pop214, $pop424 +; CHECK-NEXT: i32.const $push24=, 17 +; CHECK-NEXT: i32.shl $push216=, $pop215, $pop24 +; CHECK-NEXT: i8x16.extract_lane_u $push211=, $66, 0 +; CHECK-NEXT: i32.const $push423=, 1 +; CHECK-NEXT: i32.and $push212=, $pop211, $pop423 +; CHECK-NEXT: i32.const $push20=, 16 +; CHECK-NEXT: i32.shl $push213=, $pop212, $pop20 +; CHECK-NEXT: i32.or $push217=, $pop216, $pop213 +; CHECK-NEXT: i32.or $push221=, $pop220, $pop217 +; CHECK-NEXT: i32.or $push225=, $pop224, $pop221 +; CHECK-NEXT: i32.or $push229=, $pop228, $pop225 +; CHECK-NEXT: i32.or $push233=, $pop232, $pop229 +; CHECK-NEXT: i32.or $push237=, $pop236, $pop233 +; CHECK-NEXT: i32.or $push241=, $pop240, $pop237 +; CHECK-NEXT: i32.or $push245=, $pop244, $pop241 +; CHECK-NEXT: i32.or $push249=, $pop248, $pop245 +; CHECK-NEXT: i32.or $push253=, $pop252, $pop249 +; CHECK-NEXT: i32.or $push257=, $pop256, $pop253 +; CHECK-NEXT: i32.or $push261=, $pop260, $pop257 +; CHECK-NEXT: i32.or $push265=, $pop264, $pop261 +; CHECK-NEXT: i32.or $push269=, $pop268, $pop265 +; CHECK-NEXT: i32.or $push272=, $pop271, $pop269 +; CHECK-NEXT: i32.or $push352=, $pop351, $pop272 +; CHECK-NEXT: i64.extend_i32_u $push353=, $pop352 +; CHECK-NEXT: i8x16.splat $push96=, $32 +; CHECK-NEXT: i8x16.replace_lane $push97=, $pop96, 1, $33 +; CHECK-NEXT: i8x16.replace_lane $push98=, $pop97, 2, $34 +; CHECK-NEXT: i8x16.replace_lane $push99=, $pop98, 3, $35 +; CHECK-NEXT: i8x16.replace_lane $push100=, $pop99, 4, $36 +; CHECK-NEXT: i8x16.replace_lane $push101=, $pop100, 5, $37 +; CHECK-NEXT: i8x16.replace_lane $push102=, $pop101, 6, $38 +; CHECK-NEXT: i8x16.replace_lane $push103=, $pop102, 7, $39 +; CHECK-NEXT: i8x16.replace_lane $push104=, $pop103, 8, $40 +; CHECK-NEXT: i8x16.replace_lane $push105=, $pop104, 9, $41 +; CHECK-NEXT: i8x16.replace_lane $push106=, $pop105, 10, $42 +; CHECK-NEXT: i8x16.replace_lane $push107=, $pop106, 11, $43 +; CHECK-NEXT: i8x16.replace_lane $push108=, $pop107, 12, $44 +; CHECK-NEXT: i8x16.replace_lane $push109=, $pop108, 13, $45 +; CHECK-NEXT: i8x16.replace_lane $push110=, $pop109, 14, $46 +; CHECK-NEXT: i8x16.replace_lane $push111=, $pop110, 15, $47 +; CHECK-NEXT: v128.and $push112=, $pop111, $64 +; CHECK-NEXT: i8x16.eq $push422=, $pop112, $65 +; CHECK-NEXT: local.tee $push421=, $66=, $pop422 +; CHECK-NEXT: i8x16.extract_lane_u $push113=, $pop421, 0 +; CHECK-NEXT: i32.const $push420=, 1 +; CHECK-NEXT: i32.and $push114=, $pop113, $pop420 +; CHECK-NEXT: i8x16.extract_lane_u $push115=, $66, 1 +; CHECK-NEXT: i32.const $push419=, 1 +; CHECK-NEXT: i32.and $push116=, $pop115, $pop419 +; CHECK-NEXT: i32.const $push418=, 1 +; CHECK-NEXT: i32.shl $push117=, $pop116, $pop418 +; CHECK-NEXT: i32.or $push118=, $pop114, $pop117 +; CHECK-NEXT: i8x16.extract_lane_u $push119=, $66, 2 +; CHECK-NEXT: i32.const $push417=, 1 +; CHECK-NEXT: i32.and $push120=, $pop119, $pop417 +; CHECK-NEXT: i32.const $push416=, 2 +; CHECK-NEXT: i32.shl $push122=, $pop120, $pop416 +; CHECK-NEXT: i32.or $push123=, $pop118, $pop122 +; CHECK-NEXT: i8x16.extract_lane_u $push124=, $66, 3 +; CHECK-NEXT: i32.const $push415=, 1 +; CHECK-NEXT: i32.and $push125=, $pop124, $pop415 +; CHECK-NEXT: i32.const $push414=, 3 +; CHECK-NEXT: i32.shl $push127=, $pop125, $pop414 +; CHECK-NEXT: i32.or $push128=, $pop123, $pop127 +; CHECK-NEXT: i8x16.extract_lane_u $push129=, $66, 4 +; CHECK-NEXT: i32.const $push413=, 1 +; CHECK-NEXT: i32.and $push130=, $pop129, $pop413 +; CHECK-NEXT: i32.const $push412=, 4 +; CHECK-NEXT: i32.shl $push132=, $pop130, $pop412 +; CHECK-NEXT: i32.or $push133=, $pop128, $pop132 +; CHECK-NEXT: i8x16.extract_lane_u $push134=, $66, 5 +; CHECK-NEXT: i32.const $push411=, 1 +; CHECK-NEXT: i32.and $push135=, $pop134, $pop411 +; CHECK-NEXT: i32.const $push410=, 5 +; CHECK-NEXT: i32.shl $push137=, $pop135, $pop410 +; CHECK-NEXT: i32.or $push138=, $pop133, $pop137 +; CHECK-NEXT: i8x16.extract_lane_u $push139=, $66, 6 +; CHECK-NEXT: i32.const $push409=, 1 +; CHECK-NEXT: i32.and $push140=, $pop139, $pop409 +; CHECK-NEXT: i32.const $push408=, 6 +; CHECK-NEXT: i32.shl $push142=, $pop140, $pop408 +; CHECK-NEXT: i32.or $push143=, $pop138, $pop142 +; CHECK-NEXT: i8x16.extract_lane_u $push144=, $66, 7 +; CHECK-NEXT: i32.const $push407=, 1 +; CHECK-NEXT: i32.and $push145=, $pop144, $pop407 +; CHECK-NEXT: i32.const $push406=, 7 +; CHECK-NEXT: i32.shl $push147=, $pop145, $pop406 +; CHECK-NEXT: i32.or $push148=, $pop143, $pop147 +; CHECK-NEXT: i8x16.extract_lane_u $push149=, $66, 8 +; CHECK-NEXT: i32.const $push405=, 1 +; CHECK-NEXT: i32.and $push150=, $pop149, $pop405 +; CHECK-NEXT: i32.const $push404=, 8 +; CHECK-NEXT: i32.shl $push152=, $pop150, $pop404 +; CHECK-NEXT: i32.or $push153=, $pop148, $pop152 +; CHECK-NEXT: i8x16.extract_lane_u $push154=, $66, 9 +; CHECK-NEXT: i32.const $push403=, 1 +; CHECK-NEXT: i32.and $push155=, $pop154, $pop403 +; CHECK-NEXT: i32.const $push402=, 9 +; CHECK-NEXT: i32.shl $push157=, $pop155, $pop402 +; CHECK-NEXT: i32.or $push158=, $pop153, $pop157 +; CHECK-NEXT: i8x16.extract_lane_u $push159=, $66, 10 +; CHECK-NEXT: i32.const $push401=, 1 +; CHECK-NEXT: i32.and $push160=, $pop159, $pop401 +; CHECK-NEXT: i32.const $push400=, 10 +; CHECK-NEXT: i32.shl $push162=, $pop160, $pop400 +; CHECK-NEXT: i32.or $push163=, $pop158, $pop162 +; CHECK-NEXT: i8x16.extract_lane_u $push164=, $66, 11 +; CHECK-NEXT: i32.const $push399=, 1 +; CHECK-NEXT: i32.and $push165=, $pop164, $pop399 +; CHECK-NEXT: i32.const $push398=, 11 +; CHECK-NEXT: i32.shl $push167=, $pop165, $pop398 +; CHECK-NEXT: i32.or $push168=, $pop163, $pop167 +; CHECK-NEXT: i8x16.extract_lane_u $push169=, $66, 12 +; CHECK-NEXT: i32.const $push397=, 1 +; CHECK-NEXT: i32.and $push170=, $pop169, $pop397 +; CHECK-NEXT: i32.const $push396=, 12 +; CHECK-NEXT: i32.shl $push172=, $pop170, $pop396 +; CHECK-NEXT: i32.or $push173=, $pop168, $pop172 +; CHECK-NEXT: i8x16.extract_lane_u $push174=, $66, 13 +; CHECK-NEXT: i32.const $push395=, 1 +; CHECK-NEXT: i32.and $push175=, $pop174, $pop395 +; CHECK-NEXT: i32.const $push394=, 13 +; CHECK-NEXT: i32.shl $push177=, $pop175, $pop394 +; CHECK-NEXT: i32.or $push178=, $pop173, $pop177 +; CHECK-NEXT: i8x16.extract_lane_u $push179=, $66, 14 +; CHECK-NEXT: i32.const $push393=, 1 +; CHECK-NEXT: i32.and $push180=, $pop179, $pop393 +; CHECK-NEXT: i32.const $push392=, 14 +; CHECK-NEXT: i32.shl $push182=, $pop180, $pop392 +; CHECK-NEXT: i32.or $push183=, $pop178, $pop182 +; CHECK-NEXT: i8x16.extract_lane_u $push184=, $66, 15 +; CHECK-NEXT: i32.const $push391=, 15 +; CHECK-NEXT: i32.shl $push186=, $pop184, $pop391 +; CHECK-NEXT: i32.or $push187=, $pop183, $pop186 +; CHECK-NEXT: i32.const $push390=, 65535 +; CHECK-NEXT: i32.and $push189=, $pop187, $pop390 +; CHECK-NEXT: i8x16.splat $push0=, $48 +; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 1, $49 +; CHECK-NEXT: i8x16.replace_lane $push2=, $pop1, 2, $50 +; CHECK-NEXT: i8x16.replace_lane $push3=, $pop2, 3, $51 +; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 4, $52 +; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 5, $53 +; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 6, $54 +; CHECK-NEXT: i8x16.replace_lane $push7=, $pop6, 7, $55 +; CHECK-NEXT: i8x16.replace_lane $push8=, $pop7, 8, $56 +; CHECK-NEXT: i8x16.replace_lane $push9=, $pop8, 9, $57 +; CHECK-NEXT: i8x16.replace_lane $push10=, $pop9, 10, $58 +; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 11, $59 +; CHECK-NEXT: i8x16.replace_lane $push12=, $pop11, 12, $60 +; CHECK-NEXT: i8x16.replace_lane $push13=, $pop12, 13, $61 +; CHECK-NEXT: i8x16.replace_lane $push14=, $pop13, 14, $62 +; CHECK-NEXT: i8x16.replace_lane $push15=, $pop14, 15, $63 +; CHECK-NEXT: v128.and $push16=, $pop15, $64 +; CHECK-NEXT: i8x16.eq $push389=, $pop16, $65 +; CHECK-NEXT: local.tee $push388=, $66=, $pop389 +; CHECK-NEXT: i8x16.extract_lane_u $push92=, $pop388, 15 +; CHECK-NEXT: i32.const $push387=, 31 +; CHECK-NEXT: i32.shl $push94=, $pop92, $pop387 +; CHECK-NEXT: i8x16.extract_lane_u $push87=, $66, 14 +; CHECK-NEXT: i32.const $push386=, 1 +; CHECK-NEXT: i32.and $push88=, $pop87, $pop386 +; CHECK-NEXT: i32.const $push385=, 30 +; CHECK-NEXT: i32.shl $push90=, $pop88, $pop385 +; CHECK-NEXT: i8x16.extract_lane_u $push82=, $66, 13 +; CHECK-NEXT: i32.const $push384=, 1 +; CHECK-NEXT: i32.and $push83=, $pop82, $pop384 +; CHECK-NEXT: i32.const $push383=, 29 +; CHECK-NEXT: i32.shl $push85=, $pop83, $pop383 +; CHECK-NEXT: i8x16.extract_lane_u $push77=, $66, 12 +; CHECK-NEXT: i32.const $push382=, 1 +; CHECK-NEXT: i32.and $push78=, $pop77, $pop382 +; CHECK-NEXT: i32.const $push381=, 28 +; CHECK-NEXT: i32.shl $push80=, $pop78, $pop381 +; CHECK-NEXT: i8x16.extract_lane_u $push72=, $66, 11 +; CHECK-NEXT: i32.const $push380=, 1 +; CHECK-NEXT: i32.and $push73=, $pop72, $pop380 +; CHECK-NEXT: i32.const $push379=, 27 +; CHECK-NEXT: i32.shl $push75=, $pop73, $pop379 +; CHECK-NEXT: i8x16.extract_lane_u $push67=, $66, 10 +; CHECK-NEXT: i32.const $push378=, 1 +; CHECK-NEXT: i32.and $push68=, $pop67, $pop378 +; CHECK-NEXT: i32.const $push377=, 26 +; CHECK-NEXT: i32.shl $push70=, $pop68, $pop377 +; CHECK-NEXT: i8x16.extract_lane_u $push62=, $66, 9 +; CHECK-NEXT: i32.const $push376=, 1 +; CHECK-NEXT: i32.and $push63=, $pop62, $pop376 +; CHECK-NEXT: i32.const $push375=, 25 +; CHECK-NEXT: i32.shl $push65=, $pop63, $pop375 +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $66, 8 +; CHECK-NEXT: i32.const $push374=, 1 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop374 +; CHECK-NEXT: i32.const $push373=, 24 +; CHECK-NEXT: i32.shl $push60=, $pop58, $pop373 +; CHECK-NEXT: i8x16.extract_lane_u $push52=, $66, 7 +; CHECK-NEXT: i32.const $push372=, 1 +; CHECK-NEXT: i32.and $push53=, $pop52, $pop372 +; CHECK-NEXT: i32.const $push371=, 23 +; CHECK-NEXT: i32.shl $push55=, $pop53, $pop371 +; CHECK-NEXT: i8x16.extract_lane_u $push47=, $66, 6 +; CHECK-NEXT: i32.const $push370=, 1 +; CHECK-NEXT: i32.and $push48=, $pop47, $pop370 +; CHECK-NEXT: i32.const $push369=, 22 +; CHECK-NEXT: i32.shl $push50=, $pop48, $pop369 +; CHECK-NEXT: i8x16.extract_lane_u $push42=, $66, 5 +; CHECK-NEXT: i32.const $push368=, 1 +; CHECK-NEXT: i32.and $push43=, $pop42, $pop368 +; CHECK-NEXT: i32.const $push367=, 21 +; CHECK-NEXT: i32.shl $push45=, $pop43, $pop367 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $66, 4 +; CHECK-NEXT: i32.const $push366=, 1 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop366 +; CHECK-NEXT: i32.const $push365=, 20 +; CHECK-NEXT: i32.shl $push40=, $pop38, $pop365 +; CHECK-NEXT: i8x16.extract_lane_u $push32=, $66, 3 +; CHECK-NEXT: i32.const $push364=, 1 +; CHECK-NEXT: i32.and $push33=, $pop32, $pop364 +; CHECK-NEXT: i32.const $push363=, 19 +; CHECK-NEXT: i32.shl $push35=, $pop33, $pop363 +; CHECK-NEXT: i8x16.extract_lane_u $push27=, $66, 2 +; CHECK-NEXT: i32.const $push362=, 1 +; CHECK-NEXT: i32.and $push28=, $pop27, $pop362 +; CHECK-NEXT: i32.const $push361=, 18 +; CHECK-NEXT: i32.shl $push30=, $pop28, $pop361 +; CHECK-NEXT: i8x16.extract_lane_u $push22=, $66, 1 +; CHECK-NEXT: i32.const $push360=, 1 +; CHECK-NEXT: i32.and $push23=, $pop22, $pop360 +; CHECK-NEXT: i32.const $push359=, 17 +; CHECK-NEXT: i32.shl $push25=, $pop23, $pop359 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $66, 0 +; CHECK-NEXT: i32.const $push358=, 1 +; CHECK-NEXT: i32.and $push19=, $pop17, $pop358 +; CHECK-NEXT: i32.const $push357=, 16 +; CHECK-NEXT: i32.shl $push21=, $pop19, $pop357 +; CHECK-NEXT: i32.or $push26=, $pop25, $pop21 +; CHECK-NEXT: i32.or $push31=, $pop30, $pop26 +; CHECK-NEXT: i32.or $push36=, $pop35, $pop31 +; CHECK-NEXT: i32.or $push41=, $pop40, $pop36 +; CHECK-NEXT: i32.or $push46=, $pop45, $pop41 +; CHECK-NEXT: i32.or $push51=, $pop50, $pop46 +; CHECK-NEXT: i32.or $push56=, $pop55, $pop51 +; CHECK-NEXT: i32.or $push61=, $pop60, $pop56 +; CHECK-NEXT: i32.or $push66=, $pop65, $pop61 +; CHECK-NEXT: i32.or $push71=, $pop70, $pop66 +; CHECK-NEXT: i32.or $push76=, $pop75, $pop71 +; CHECK-NEXT: i32.or $push81=, $pop80, $pop76 +; CHECK-NEXT: i32.or $push86=, $pop85, $pop81 +; CHECK-NEXT: i32.or $push91=, $pop90, $pop86 +; CHECK-NEXT: i32.or $push95=, $pop94, $pop91 +; CHECK-NEXT: i32.or $push190=, $pop189, $pop95 +; CHECK-NEXT: i64.extend_i32_u $push191=, $pop190 +; CHECK-NEXT: i64.const $push192=, 32 +; CHECK-NEXT: i64.shl $push193=, $pop191, $pop192 +; CHECK-NEXT: i64.or $push354=, $pop353, $pop193 +; CHECK-NEXT: return $pop354 + %z = icmp eq <64 x i4> %x, splat (i4 64) + %res = bitcast <64 x i1> %z to i64 + ret i64 %res +}