diff options
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/pr27078.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/swaps-le-2.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vsx-word-splats.ll | 147 |
8 files changed, 186 insertions, 9 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d0f9e8dcc98..e5cd3e30afb 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1014,6 +1014,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; @@ -7419,6 +7420,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + if (Subtarget.hasVSX()) { + if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { + int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, + DAG.getConstant(SplatIdx, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); + } + } + if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index c097a62712f..b60fa538f3c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -61,6 +61,10 @@ namespace llvm { /// VPERM, + /// XXSPLT - The PPC VSX splat instructions + /// + XXSPLT, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 838889660a3..ad382341ed5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -31,6 +31,10 @@ def SDT_PPCvperm : SDTypeProfile<1, 3, [ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> ]>; +def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisInt<2> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -141,6 +145,7 @@ def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d7f64c97d6c..bc91fb6874b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -775,7 +775,9 @@ let Uses = [RM] in { "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), - "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, + [(set v4i32:$XT, + (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; } // hasSideEffects // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 10636b86c3b..a1aa841eccc 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -407,9 +407,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VSPLTB: case PPC::VSPLTH: case PPC::VSPLTW: + case PPC::XXSPLTW: // Splats are lane-sensitive, but we can use special handling - // to adjust the source lane for the splat. This is not yet - // implemented. When it is, we need to uncomment the following: + // to adjust the source lane for the splat. SwapVector[VecIdx].IsSwappable = 1; SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT; break; @@ -515,7 +515,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // permute control vectors (for shift values 1, 2, 3). However, // VPERM has a more restrictive register class. case PPC::XXSLDWI: - case PPC::XXSPLTW: break; } } @@ -806,12 +805,21 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { llvm_unreachable("Unexpected splat opcode"); case PPC::VSPLTB: NElts = 16; break; case PPC::VSPLTH: NElts = 8; break; - case PPC::VSPLTW: NElts = 4; break; + case PPC::VSPLTW: + case PPC::XXSPLTW: NElts = 4; break; } - unsigned EltNo = MI->getOperand(1).getImm(); + unsigned EltNo; + if (MI->getOpcode() == PPC::XXSPLTW) + EltNo = MI->getOperand(2).getImm(); + else + EltNo = MI->getOperand(1).getImm(); + EltNo = (EltNo + NElts / 2) % NElts; - MI->getOperand(1).setImm(EltNo); + if (MI->getOpcode() == PPC::XXSPLTW) + MI->getOperand(2).setImm(EltNo); + else + MI->getOperand(1).setImm(EltNo); DEBUG(dbgs() << " Into: "); DEBUG(MI->dump()); diff --git a/llvm/test/CodeGen/PowerPC/pr27078.ll b/llvm/test/CodeGen/PowerPC/pr27078.ll index 324462cf601..b1fdbbde692 100644 --- a/llvm/test/CodeGen/PowerPC/pr27078.ll +++ b/llvm/test/CodeGen/PowerPC/pr27078.ll @@ -9,7 +9,7 @@ define <4 x float> @bar(float* %p, float* %q) { %6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> ret <4 x float> %6 -; CHECK: vspltw +; CHECK: xxspltw ; CHECK: vmrghw ; CHECK: vsldoi } diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-2.ll b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll index 08096ed20dd..9d1eb412cba 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-2.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll @@ -87,5 +87,5 @@ entry: ; CHECK-LABEL: @ifoo ; CHECK: lxvd2x -; CHECK: vspltw {{[0-9]+}}, {{[0-9]+}}, 0 +; CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 ; CHECK: stxvd2x diff --git a/llvm/test/CodeGen/PowerPC/vsx-word-splats.ll b/llvm/test/CodeGen/PowerPC/vsx-word-splats.ll new file mode 100644 index 00000000000..5632011da35 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vsx-word-splats.ll @@ -0,0 +1,147 @@ +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE + +define <4 x float> @test0f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test0f +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0f +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x float> @test1f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test1f +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1f +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x float> @test2f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test2f +; CHECK xxspltw: 34, 34, 1 +; CHECK-LABEL: test2f +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x float> @test3f(<4 x float> %a) { +entry: + %0 = bitcast <4 x float> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> + %2 = bitcast <16 x i8> %1 to <4 x float> + ret <4 x float> %2 +; CHECK-LABEL: test3f +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3f +; CHECK-BE: xxspltw 34, 34, 3 +} + +define <4 x i32> @test0si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test0si +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0si +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x i32> @test1si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test1si +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1si +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x i32> @test2si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test2si +; CHECK xxspltw: 34, 34, 1 +; CHECK-BE-LABEL: test2si +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x i32> @test3si(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test3si +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3si +; CHECK-BE: xxspltw 34, 34, 3 +} + +define <4 x i32> @test0ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test0ui +; CHECK xxspltw: 34, 34, 3 +; CHECK-BE-LABEL: test0ui +; CHECK-BE: xxspltw 34, 34, 0 +} + +define <4 x i32> @test1ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test1ui +; CHECK xxspltw: 34, 34, 2 +; CHECK-BE-LABEL: test1ui +; CHECK-BE: xxspltw 34, 34, 1 +} + +define <4 x i32> @test2ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test2ui +; CHECK xxspltw: 34, 34, 1 +; CHECK-BE-LABEL: test2ui +; CHECK-BE: xxspltw 34, 34, 2 +} + +define <4 x i32> @test3ui(<4 x i32> %a) { +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +; CHECK-LABEL: test3ui +; CHECK xxspltw: 34, 34, 0 +; CHECK-BE-LABEL: test3ui +; CHECK-BE: xxspltw 34, 34, 3 +} |