diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
commit | 6e7879c5e6e26c2c6d00618f12af20012eecff41 (patch) | |
tree | e75271eca7c8a4f68a884aad96748b64ab021bb5 /llvm/lib/Target/PowerPC | |
parent | 2ce2ab3a4dc1df6ec87a42aa2baa6741c2456e0c (diff) | |
download | bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.tar.gz bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.zip |
[Power9] Add exploitation of non-permuting memory ops
This patch corresponds to review:
https://reviews.llvm.org/D19825
The new lxvx/stxvx instructions do not require the swaps to line the elements
up correctly. In order to select them over the lxvd2x/lxvw4x instructions which
require swaps, the patterns for the old instruction have a predicate that
ensures they won't be selected on Power9 and newer CPUs.
llvm-svn: 282143
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 53 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.h | 3 |
5 files changed, 68 insertions, 21 deletions
diff --git a/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 61ad5705329..8190f31004e 100644 --- a/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -33,6 +33,11 @@ static cl::opt<bool> FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), cl::desc("Use full register names when printing assembly")); +// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively. +static cl::opt<bool> +ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false), + cl::desc("Prints full register names with vs{31-63} as v{0-31}")); + #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" @@ -462,6 +467,14 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { const char *RegName = getRegisterName(Op.getReg()); + if (ShowVSRNumsAsVR) { + unsigned RegNum = Op.getReg(); + if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31) + O << 'v' << RegNum - PPC::VSH0; + else + O << RegName; + return; + } // The linux and AIX assembler does not take register prefixes. if (!isDarwinSyntax()) RegName = stripRegisterPrefix(RegName); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cd75474a76a..1d9181b95d1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10734,10 +10734,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } // For little endian, VSX stores require generating xxswapd/lxvd2x. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. EVT VT = N->getOperand(1).getValueType(); if (VT.isSimple()) { MVT StoreVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (Subtarget.needsSwapsForVSXMemOps() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); @@ -10749,9 +10750,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (Subtarget.needsSwapsForVSXMemOps() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); @@ -11066,7 +11068,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_W_CHAIN: { // For little endian, VSX loads require generating lxvd2x/xxswapd. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. + if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { default: break; @@ -11079,7 +11082,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::INTRINSIC_VOID: { // For little endian, VSX stores require generating xxswapd/stxvd2x. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. + if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { default: break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 8cbd71ec048..88915a54403 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -273,6 +273,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::LXVD2X: + case PPC::LXVX: case PPC::QVLFDX: case PPC::QVLFSXs: case PPC::QVLFDXb: @@ -302,6 +303,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::STXVD2X: + case PPC::STXVX: case PPC::QVSTFDX: case PPC::QVSTFSXs: case PPC::QVSTFDXb: @@ -1008,7 +1010,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X)) + unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); @@ -1129,7 +1132,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), + unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg), FrameIdx)); NonRI = true; } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index e229abd5a7e..111d8fc379b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -92,6 +92,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; +def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. @@ -105,6 +106,7 @@ let Uses = [RM] in { "lxsdx $XT, $src", IIC_LdStLFD, [(set f64:$XT, (load xoaddr:$src))]>; + let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVD2X : XX1Form<31, 844, (outs vsrc:$XT), (ins memrr:$src), "lxvd2x $XT, $src", IIC_LdStLFD, @@ -114,6 +116,7 @@ let Uses = [RM] in { (outs vsrc:$XT), (ins memrr:$src), "lxvdsx $XT, $src", IIC_LdStLFD, []>; + let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, @@ -127,6 +130,7 @@ let Uses = [RM] in { "stxsdx $XT, $dst", IIC_LdStSTFD, [(store f64:$XT, xoaddr:$dst)]>; + let Predicates = [HasVSX, HasOnlySwappingMemOps] in { def STXVD2X : XX1Form<31, 972, (outs), (ins vsrc:$XT, memrr:$dst), "stxvd2x $XT, $dst", IIC_LdStSTFD, @@ -136,7 +140,7 @@ let Uses = [RM] in { (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, [(store v4i32:$XT, xoaddr:$dst)]>; - + } } // mayStore // Add/Mul Instructions @@ -948,18 +952,20 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; // Loads. -def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; -def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; - -// Stores. -def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +let Predicates = [HasVSX, HasOnlySwappingMemOps] in { + def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; + + // Stores. + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +} // Permutes. def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; @@ -2185,7 +2191,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; // Load Vector Indexed - def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, []>; + def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, + [(set v2f64:$XT, (load xoaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : X_XT6_RA5_RB5<31, 269, "lxvl" , vsrc, []>; @@ -2221,7 +2228,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; // Store Vector Indexed - def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, []>; + def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, + [(store v2f64:$XT, xoaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; @@ -2282,4 +2290,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector + + def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; } // end HasP9Vector, AddedComplexity diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index f58c7c10c8b..d80a9ad8d34 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -277,6 +277,9 @@ public: bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } bool useLongCalls() const { return UseLongCalls; } + bool needsSwapsForVSXMemOps() const { + return hasVSX() && isLittleEndian() && !hasP9Vector(); + } POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; } |