diff options
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 37 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZOperators.td | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/buildvector-00.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-const-05.ll | 57 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-const-06.ll | 40 |
8 files changed, 101 insertions, 119 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index ab29eb12d23..01f39257dae 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -1526,6 +1527,20 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { break; } + case ISD::BUILD_VECTOR: { + auto *BVN = cast<BuildVectorSDNode>(Node); + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + uint64_t Mask = 0; + if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { + SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, + CurDAG->getTargetConstant(Mask, DL, MVT::i32)); + ReplaceNode(Node, Res); + return; + } + break; + } + case ISD::STORE: { if (tryFoldLoadStoreIntoMemOperand(Node)) return; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 5aeb5ed2d27..4e9ee7feac6 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2510,9 +2510,8 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, break; } if (Invert) { - SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(65535, DL, MVT::i32)); - Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } return Cmp; @@ -3330,14 +3329,14 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, break; } case 32: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; @@ -4259,10 +4258,10 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a -// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -// on success. -static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { +// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style +// mask. Store the mask value in Mask on success. +bool SystemZTargetLowering:: +tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { EVT ElemVT = BVN->getValueType(0).getVectorElementType(); unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { @@ -4541,13 +4540,11 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- // preferred way of creating all-zero and all-one vectors so give it // priority over other methods below. - uint64_t Mask = 0; - if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode( - SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } + uint64_t Mask; + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + ISD::isBuildVectorAllOnes(Op.getNode()) || + (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask))) + return Op; // Try using some form of replication. APInt SplatBits, SplatUndef; @@ -5027,7 +5024,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); - OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); @@ -5339,8 +5335,7 @@ SDValue SystemZTargetLowering::combineMERGE( SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index cd0e4c3468b..a40eb4cbc2a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -161,10 +161,6 @@ enum NodeType : unsigned { // Transaction end. Just the chain operand. Returns CC value and chain. TEND, - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - // Create a vector constant by replicating an element-sized RISBG-style mask. // The first operand specifies the starting set bit and the second operand // specifies the ending set bit. Both operands count from the MSB of the @@ -515,6 +511,8 @@ public: return true; } + static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); + private: const SystemZSubtarget &Subtarget; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 82cca0b1217..dd2a0d58cdf 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 4b938ce6099..9914db8651c 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -286,7 +286,6 @@ def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; -def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; @@ -708,10 +707,6 @@ class shiftop<SDPatternOperator operator> [(operator node:$val, node:$count), (operator node:$val, (and node:$count, imm32bottom6set))]>; -// Vector representation of all-zeros and all-ones. -def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; - // Load a scalar and replicate it in all elements of a vector. class z_replicate_load<ValueType scalartype, SDPatternOperator load> : PatFrag<(ops node:$addr), @@ -739,13 +734,13 @@ def z_vlef64 : z_vle<f64, load>; // zeroed vector. class z_vllez<ValueType scalartype, SDPatternOperator load, int index> : PatFrag<(ops node:$addr), - (z_vector_insert (z_vzero), + (z_vector_insert (immAllZerosV), (scalartype (load node:$addr)), (i32 index))>; def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>; def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>; def z_vllezi32 : z_vllez<i32, load, 1>; def z_vllezi64 : PatFrags<(ops node:$addr), - [(z_vector_insert (z_vzero), + [(z_vector_insert (immAllZerosV), (i64 (load node:$addr)), (i32 0)), (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero @@ -758,11 +753,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr), (bitconvert (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), - (v2i64 (z_vzero)))>; + (v2i64 + (bitconvert (v4f32 (immAllZerosV)))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), - (z_vzero))>; + (immAllZerosV))>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez<i32, load, 0>; @@ -773,8 +769,9 @@ def z_vllezlf32 : PatFrag<(ops node:$addr), (z_merge_high (v4f32 (scalar_to_vector (f32 (load node:$addr)))), - (v4f32 (z_vzero))))), - (v2i64 (z_vzero)))>; + (v4f32 (immAllZerosV))))), + (v2i64 + (bitconvert (v4f32 (immAllZerosV)))))>; // Store one element of a vector. class z_vste<ValueType scalartype, SDPatternOperator store> @@ -789,16 +786,16 @@ def z_vstef32 : z_vste<f32, store>; def z_vstef64 : z_vste<f64, store>; // Arithmetic negation on vectors. -def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; +def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>; // Bitwise negation on vectors. -def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>; // Signed "integer greater than zero" on vectors. -def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>; // Signed "integer less than zero" on vectors. -def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>; // Integer absolute on vectors. class z_viabs<int shift> diff --git a/llvm/test/CodeGen/SystemZ/buildvector-00.ll b/llvm/test/CodeGen/SystemZ/buildvector-00.ll new file mode 100644 index 00000000000..dc1c0ffd4f6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/buildvector-00.ll @@ -0,0 +1,36 @@ +; Test that the dag combiner can understand that some vector operands are +; all-zeros and then optimize the logical operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: vno +; CHECK-NOT: vno + +bb: + %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = load i64, i64* undef, align 8 + %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1 + %tmp4 = icmp ne <2 x i64> undef, zeroinitializer + %tmp5 = xor <2 x i1> %tmp4, zeroinitializer + %tmp6 = xor <2 x i1> zeroinitializer, %tmp5 + %tmp7 = and <2 x i64> %tmp3, %tmp + %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer + %tmp9 = xor <2 x i1> zeroinitializer, %tmp8 + %tmp10 = icmp ne <2 x i64> undef, zeroinitializer + %tmp11 = xor <2 x i1> %tmp10, %tmp9 + %tmp12 = and <2 x i1> %tmp6, %tmp11 + %tmp13 = extractelement <2 x i1> %tmp12, i32 0 + br i1 %tmp13, label %bb14, label %bb15 + +bb14: ; preds = %bb1 + store i64 undef, i64* undef, align 8 + br label %bb15 + +bb15: ; preds = %bb14, %bb1 + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-05.ll b/llvm/test/CodeGen/SystemZ/vec-const-05.ll index 55f3cdd5902..719280e9d60 100644 --- a/llvm/test/CodeGen/SystemZ/vec-const-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-const-05.ll @@ -1,63 +1,28 @@ -; Test vector byte masks, v4f32 version. +; Test vector byte masks, v4f32 version. Only all-zero vectors are handled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <4 x float> @f1() { -; CHECK-LABEL: f1: +define <4 x float> @f0() { +; CHECK-LABEL: f0: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <4 x float> zeroinitializer } -; Test an all-ones vector. -define <4 x float> @f2() { -; CHECK-LABEL: f2: -; CHECK: vgbm %v24, 65535 -; CHECK: br %r14 - ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, - float 0xffffffffe0000000, float 0xffffffffe0000000> -} - -; Test a mixed vector (mask 0xc731). -define <4 x float> @f3() { -; CHECK-LABEL: f3: -; CHECK: vgbm %v24, 50993 -; CHECK: br %r14 - ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000, - float 0x379fffe000000000, float 0x371fe00000000000> -} - -; Test that undefs are treated as zero (mask 0xc031). -define <4 x float> @f4() { -; CHECK-LABEL: f4: -; CHECK: vgbm %v24, 49201 -; CHECK: br %r14 - ret <4 x float> <float 0xffffe00000000000, float undef, - float 0x379fffe000000000, float 0x371fe00000000000> -} - -; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -define <4 x float> @f5() { -; CHECK-LABEL: f5: -; CHECK-NOT: vgbm +; Test that undefs are treated as zero. +define <4 x float> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgbm %v24, 0 ; CHECK: br %r14 - ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000, - float 0x379fffe000000000, float 0x371fe00000000000> + ret <4 x float> <float zeroinitializer, float undef, + float zeroinitializer, float undef> } ; Test an all-zeros v2f32 that gets promoted to v4f32. -define <2 x float> @f6() { -; CHECK-LABEL: f6: +define <2 x float> @f2() { +; CHECK-LABEL: f2: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x float> zeroinitializer } - -; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). -define <2 x float> @f7() { -; CHECK-LABEL: f7: -; CHECK: vgbm %v24, 50944 -; CHECK: br %r14 - ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000> -} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-06.ll b/llvm/test/CodeGen/SystemZ/vec-const-06.ll index be53a0581ec..6144e0f4910 100644 --- a/llvm/test/CodeGen/SystemZ/vec-const-06.ll +++ b/llvm/test/CodeGen/SystemZ/vec-const-06.ll @@ -1,43 +1,19 @@ -; Test vector byte masks, v2f64 version. +; Test vector byte masks, v2f64 version. Only all-zero vectors are handled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <2 x double> @f1() { -; CHECK-LABEL: f1: +define <2 x double> @f0() { +; CHECK-LABEL: f0: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x double> zeroinitializer } -; Test an all-ones vector. -define <2 x double> @f2() { -; CHECK-LABEL: f2: -; CHECK: vgbm %v24, 65535 -; CHECK: br %r14 - ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff> -} - -; Test a mixed vector (mask 0x8c76). -define <2 x double> @f3() { -; CHECK-LABEL: f3: -; CHECK: vgbm %v24, 35958 -; CHECK: br %r14 - ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00> -} - -; Test that undefs are treated as zero (mask 0x8c00). -define <2 x double> @f4() { -; CHECK-LABEL: f4: -; CHECK: vgbm %v24, 35840 -; CHECK: br %r14 - ret <2 x double> <double 0xff000000ffff0000, double undef> -} - -; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -define <2 x double> @f5() { -; CHECK-LABEL: f5: -; CHECK-NOT: vgbm +; Test that undefs are treated as zero. +define <2 x double> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgbm %v24, 0 ; CHECK: br %r14 - ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00> + ret <2 x double> <double zeroinitializer, double undef> } |

