summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td53
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h3
5 files changed, 68 insertions, 21 deletions
diff --git a/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 61ad5705329..8190f31004e 100644
--- a/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -33,6 +33,11 @@ static cl::opt<bool>
FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
cl::desc("Use full register names when printing assembly"));
+// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively.
+static cl::opt<bool>
+ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false),
+ cl::desc("Prints full register names with vs{31-63} as v{0-31}"));
+
#define PRINT_ALIAS_INSTR
#include "PPCGenAsmWriter.inc"
@@ -462,6 +467,14 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
const char *RegName = getRegisterName(Op.getReg());
+ if (ShowVSRNumsAsVR) {
+ unsigned RegNum = Op.getReg();
+ if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31)
+ O << 'v' << RegNum - PPC::VSH0;
+ else
+ O << RegName;
+ return;
+ }
// The linux and AIX assembler does not take register prefixes.
if (!isDarwinSyntax())
RegName = stripRegisterPrefix(RegName);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cd75474a76a..1d9181b95d1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10734,10 +10734,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
// For little endian, VSX stores require generating xxswapd/lxvd2x.
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
EVT VT = N->getOperand(1).getValueType();
if (VT.isSimple()) {
MVT StoreVT = VT.getSimpleVT();
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ if (Subtarget.needsSwapsForVSXMemOps() &&
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
return expandVSXStoreForLE(N, DCI);
@@ -10749,9 +10750,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = LD->getValueType(0);
// For little endian, VSX loads require generating lxvd2x/xxswapd.
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (VT.isSimple()) {
MVT LoadVT = VT.getSimpleVT();
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ if (Subtarget.needsSwapsForVSXMemOps() &&
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
return expandVSXLoadForLE(N, DCI);
@@ -11066,7 +11068,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::INTRINSIC_W_CHAIN: {
// For little endian, VSX loads require generating lxvd2x/xxswapd.
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+ if (Subtarget.needsSwapsForVSXMemOps()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
@@ -11079,7 +11082,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::INTRINSIC_VOID: {
// For little endian, VSX stores require generating xxswapd/stxvd2x.
- if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
+ if (Subtarget.needsSwapsForVSXMemOps()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 8cbd71ec048..88915a54403 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -273,6 +273,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
+ case PPC::LXVX:
case PPC::QVLFDX:
case PPC::QVLFSXs:
case PPC::QVLFDXb:
@@ -302,6 +303,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
+ case PPC::STXVX:
case PPC::QVSTFDX:
case PPC::QVSTFSXs:
case PPC::QVSTFDXb:
@@ -1008,7 +1010,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
@@ -1129,7 +1132,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
FrameIdx));
NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e229abd5a7e..111d8fc379b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -92,6 +92,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
@@ -105,6 +106,7 @@ let Uses = [RM] in {
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
+ let Predicates = [HasVSX, HasOnlySwappingMemOps] in
def LXVD2X : XX1Form<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
"lxvd2x $XT, $src", IIC_LdStLFD,
@@ -114,6 +116,7 @@ let Uses = [RM] in {
(outs vsrc:$XT), (ins memrr:$src),
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
+ let Predicates = [HasVSX, HasOnlySwappingMemOps] in
def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
"lxvw4x $XT, $src", IIC_LdStLFD,
@@ -127,6 +130,7 @@ let Uses = [RM] in {
"stxsdx $XT, $dst", IIC_LdStSTFD,
[(store f64:$XT, xoaddr:$dst)]>;
+ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
@@ -136,7 +140,7 @@ let Uses = [RM] in {
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvw4x $XT, $dst", IIC_LdStSTFD,
[(store v4i32:$XT, xoaddr:$dst)]>;
-
+ }
} // mayStore
// Add/Mul Instructions
@@ -948,18 +952,20 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
// Loads.
-def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
-// Stores.
-def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+ def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+ // Stores.
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+ def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+}
// Permutes.
def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
@@ -2185,7 +2191,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
// Load Vector Indexed
- def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, []>;
+ def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : X_XT6_RA5_RB5<31, 269, "lxvl" , vsrc, []>;
@@ -2221,7 +2228,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
// Store Vector Indexed
- def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, []>;
+ def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>;
@@ -2282,4 +2290,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
} // IsLittleEndian, HasP9Vector
+
+ def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
} // end HasP9Vector, AddedComplexity
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index f58c7c10c8b..d80a9ad8d34 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -277,6 +277,9 @@ public:
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
bool useLongCalls() const { return UseLongCalls; }
+ bool needsSwapsForVSXMemOps() const {
+ return hasVSX() && isLittleEndian() && !hasP9Vector();
+ }
POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; }
OpenPOWER on IntegriCloud