diff options
| author | Ehsan Amiri <amehsan@ca.ibm.com> | 2016-10-24 17:31:09 +0000 |
|---|---|---|
| committer | Ehsan Amiri <amehsan@ca.ibm.com> | 2016-10-24 17:31:09 +0000 |
| commit | c90b02cf5035819577fa7defa08902cf63a8b749 (patch) | |
| tree | f8f23b18362f4afe9ec0a7eae6f780ae8111a89c | |
| parent | 47f2616b6a491baa600ae2a1cdca43276be2bf2f (diff) | |
| download | bcm5719-llvm-c90b02cf5035819577fa7defa08902cf63a8b749.tar.gz bcm5719-llvm-c90b02cf5035819577fa7defa08902cf63a8b749.zip | |
[PPC] Generate positive FP zero using xor insn instead of loading from constant area
https://reviews.llvm.org/D23614
Currently we load +0.0 from constant area. That can change to be generated using
XOR instruction.
llvm-svn: 284995
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrFormats.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 11 | ||||
| -rw-r--r-- | llvm/pzero-fp-xored.ll | 71 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/crbits.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll | 2 |
9 files changed, 124 insertions, 10 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 55e299cdc9d..d54c76e52c0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -562,6 +562,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + if (Subtarget.hasVSX()) + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); @@ -12367,3 +12371,20 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (!Subtarget.isTargetLinux()) return TargetLowering::insertSSPDeclarations(M); } + +bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + + if (!VT.isSimple() || !Subtarget.hasVSX()) + return false; + + switch(VT.getSimpleVT().SimpleTy) { + default: + // For FP types that are currently not supported by PPC backend, return + // false. Examples: f16, f80. + return false; + case MVT::f32: + case MVT::f64: + case MVT::ppcf128: + return Imm.isPosZero(); + } +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 3d4ec27c5a6..2944e99db01 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -769,6 +769,7 @@ namespace llvm { bool useLoadStackGuardNode() const override; void insertSSPDeclarations(Module &M) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; private: struct ReuseLoadInfo { SDValue Ptr; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 6f100031f14..99689f656c2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1076,6 +1076,13 @@ class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, let XB = XT; } +class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let XB = XT; + let XA = XT; +} + class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 07d23799465..5f10e0ccd1c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -621,6 +621,9 @@ def s17imm : Operand<i32> { let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; } + +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; let RenderMethod = "addBranchTargetOperands"; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 5206d5747c4..24411cb684f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -791,6 +791,17 @@ let Uses = [RM] in { "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; + let isCodeGenOnly = 1 in { + def XXLXORdpz : XX3Form_SetZero<60, 154, + (outs vsfrc:$XT), (ins), + "xxlxor $XT, $XT, $XT", IIC_VecGeneral, + [(set f64:$XT, (fpimm0))]>; + def XXLXORspz : XX3Form_SetZero<60, 154, + (outs vssrc:$XT), (ins), + "xxlxor $XT, $XT, $XT", IIC_VecGeneral, + [(set f32:$XT, (fpimm0))]>; + } + // Permutation Instructions def XXMRGHW : XX3Form<60, 18, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), diff --git a/llvm/pzero-fp-xored.ll b/llvm/pzero-fp-xored.ll new file mode 100644 index 00000000000..6504f57a251 --- /dev/null +++ b/llvm/pzero-fp-xored.ll @@ -0,0 +1,71 @@ +; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=+vsx < %s | \ +; RUN: FileCheck %s --implicit-check-not lxvd2x --implicit-check-not lfs +; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=-vsx -mattr=-p8altivec < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-NVSXP8A --implicit-check-not xxlxor \ +; RUN: --implicit-check-not vxor + +define signext i32 @t1(float %x) local_unnamed_addr #0 { +entry: + %cmp = fcmp ogt float %x, 0.000000e+00 + %tmp = select i1 %cmp, i32 43, i32 11 + ret i32 %tmp + +; CHECK-LABEL: t1: +; CHECK: xxlxor [[REG1:[0-9]+]], [[REG1]], [[REG1]] +; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]] +; CHECK: blr +; CHECK-NVSXP8A: lfs [[REG1:[0-9]+]] +; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]] +; CHECK-NVSXP8A: blr +} + +define signext i32 @t2(double %x) local_unnamed_addr #0 { +entry: + %cmp = fcmp ogt double %x, 0.000000e+00 + %tmp = select i1 %cmp, i32 43, i32 11 + ret i32 %tmp + +; CHECK-LABEL: t2: +; CHECK: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]] +; CHECK: xscmpudp {{[0-9]+}}, {{[0-9]+}}, [[REG2]] +; CHECK: blr +; CHECK-NVSXP8A: lfs [[REG2:[0-9]+]] +; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG2]] +; CHECK-NVSXP8A: blr +} + +define signext i32 @t3(ppc_fp128 %x) local_unnamed_addr #0 { +entry: + %cmp = fcmp ogt ppc_fp128 %x, 0xM00000000000000000000000000000000 + %tmp = select i1 %cmp, i32 43, i32 11 + ret i32 %tmp + +; CHECK-LABEL: t3: +; CHECK: xxlxor [[REG3:[0-9]+]], [[REG3]], [[REG3]] +; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]] +; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]] +; CHECK: blr +; CHECK-NVSXP8A: lfs [[REG3:[0-9]+]] +; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]] +; CHECK-NVSXP8A: blr +} + +define <2 x double> @t4() local_unnamed_addr #0 { + ret <2 x double> zeroinitializer +; CHECK-LABEL: t4: +; CHECK: vxor [[REG4:[0-9]+]], [[REG4]], [[REG4]] +; CHECK: blr +; CHECK-NVSXP8A: lfs [[REG4:[0-9]+]] +; CHECK-NVSXP8A: fmr {{[0-9]+}}, [[REG4:[0-9]+]] +; CHECK-NVSXP8A: blr +} + +define <2 x i64> @t5() local_unnamed_addr #0 { + ret <2 x i64> zeroinitializer +; CHECK-LABEL: t5: +; CHECK: vxor [[REG5:[0-9]+]], [[REG5]], [[REG5]] +; CHECK: blr +; CHECK-NVSXP8A: lvx +; CHECK-NVSXP8A: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/crbits.ll b/llvm/test/CodeGen/PowerPC/crbits.ll index f2f3e6a2b8f..97f02ef31b3 100644 --- a/llvm/test/CodeGen/PowerPC/crbits.ll +++ b/llvm/test/CodeGen/PowerPC/crbits.ll @@ -13,7 +13,7 @@ entry: ; CHECK-LABEL: @test1 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK-DAG: li [[REG1:[0-9]+]], 1 -; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]] ; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] ; CHECK: crnor ; CHECK: crnor @@ -33,7 +33,7 @@ entry: ; CHECK-LABEL: @test2 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK-DAG: li [[REG1:[0-9]+]], 1 -; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]] ; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] ; CHECK: crnor ; CHECK: crnor @@ -55,7 +55,7 @@ entry: ; CHECK-LABEL: @test3 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2 ; CHECK-DAG: li [[REG1:[0-9]+]], 1 -; CHECK-DAG: lfs [[REG2:[0-9]+]], +; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]] ; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]] ; CHECK: crnor ; CHECK: crnor diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll index 4390b938aea..f060395bb24 100644 --- a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll +++ b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll @@ -2,7 +2,7 @@ define i1 @TestULT(double %t0) { ; CHECK-LABEL: TestULT: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp ult double %t0, 0.000000e+00 @@ -49,7 +49,7 @@ good: define i1 @TestUEQ(double %t0) { ; CHECK-LABEL: TestUEQ: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp ueq double %t0, 0.000000e+00 @@ -64,7 +64,7 @@ good: define i1 @TestUGT(double %t0) { ; CHECK-LABEL: TestUGT: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp ugt double %t0, 0.000000e+00 @@ -111,7 +111,7 @@ good: define i1 @TestOLE(double %t0) { ; CHECK-LABEL: TestOLE: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp ole double %t0, 0.000000e+00 @@ -126,7 +126,7 @@ good: define i1 @TestONE(double %t0) { ; CHECK-LABEL: TestONE: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp one double %t0, 0.000000e+00 @@ -173,7 +173,7 @@ good: define i1 @TestOGE(double %t0) { ; CHECK-LABEL: TestOGE: -; CHECK: mcrf +; CHECK: xscmpudp ; CHECK: blr entry: %t1 = fcmp oge double %t0, 0.000000e+00 diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll b/llvm/test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll index 93d47df70db..960e6b61b5b 100644 --- a/llvm/test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll @@ -5,7 +5,7 @@ target triple = "powerpc64le-unknown-linux-gnu" ; Check that the conditional return block of fmax_double3.exit was not ; duplicated into the if.then.i block ; CHECK: # %if.then.i -; CHECK: lxvd2x +; CHECK: xxlxor ; CHECK: stxvd2x ; CHECK-NOT: bclr ; CHECK: {{^}}.LBB{{[0-9_]+}}: |

