summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp107
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td79
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll45
-rw-r--r--llvm/test/CodeGen/PowerPC/build-vector-tests.ll72
-rw-r--r--llvm/test/CodeGen/PowerPC/direct-move-profit.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/pr25157-peephole.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/pr25157.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/select_const.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll8
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll20
16 files changed, 263 insertions, 106 deletions
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 510352d5a9b..dc6ed16e53c 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -711,7 +711,8 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
LXV,
LXVX,
LXSD,
- DFLOADf64
+ DFLOADf64,
+ XFLOADf64
)>;
// 4 Cycle load uses a single slice.
@@ -751,7 +752,10 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
LXSSPX,
LXSIWAX,
LXSSP,
- DFLOADf32
+ DFLOADf32,
+ XFLOADf32,
+ LIWAX,
+ LIWZX
)>;
// Cracked Load that requires the PM resource.
@@ -781,7 +785,10 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
STXSSPX,
STXSIWX,
DFSTOREf32,
- DFSTOREf64
+ DFSTOREf64,
+ XFSTOREf32,
+ XFSTOREf64,
+ STIWX
)>;
// Store operation that requires the whole superslice.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 70920294aea..99a52902a52 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1977,29 +1977,13 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
-bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- auto &MBB = *MI.getParent();
- auto DL = MI.getDebugLoc();
- switch (MI.getOpcode()) {
- case TargetOpcode::LOAD_STACK_GUARD: {
- assert(Subtarget.isTargetLinux() &&
- "Only Linux target is expected to contain LOAD_STACK_GUARD");
- const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
- const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
- MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(Offset)
- .addReg(Reg);
- return true;
- }
- case PPC::DFLOADf32:
- case PPC::DFLOADf64:
- case PPC::DFSTOREf32:
- case PPC::DFSTOREf64: {
- assert(Subtarget.hasP9Vector() &&
- "Invalid D-Form Pseudo-ops on non-P9 target.");
- assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
- "D-form op must have register and immediate operands");
+// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
+// The VSX versions have the advantage of a full 64-register target whereas
+// the FP ones have the advantage of lower latency and higher throughput. So
+// what we are after is using the faster instructions in low register pressure
+// situations and using the larger register file in high register pressure
+// situations.
+bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
unsigned UpperOpcode, LowerOpcode;
switch (MI.getOpcode()) {
case PPC::DFLOADf32:
@@ -2018,7 +2002,38 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
UpperOpcode = PPC::STXSD;
LowerOpcode = PPC::STFD;
break;
+ case PPC::XFLOADf32:
+ UpperOpcode = PPC::LXSSPX;
+ LowerOpcode = PPC::LFSX;
+ break;
+ case PPC::XFLOADf64:
+ UpperOpcode = PPC::LXSDX;
+ LowerOpcode = PPC::LFDX;
+ break;
+ case PPC::XFSTOREf32:
+ UpperOpcode = PPC::STXSSPX;
+ LowerOpcode = PPC::STFSX;
+ break;
+ case PPC::XFSTOREf64:
+ UpperOpcode = PPC::STXSDX;
+ LowerOpcode = PPC::STFDX;
+ break;
+ case PPC::LIWAX:
+ UpperOpcode = PPC::LXSIWAX;
+ LowerOpcode = PPC::LFIWAX;
+ break;
+ case PPC::LIWZX:
+ UpperOpcode = PPC::LXSIWZX;
+ LowerOpcode = PPC::LFIWZX;
+ break;
+ case PPC::STIWX:
+ UpperOpcode = PPC::STXSIWX;
+ LowerOpcode = PPC::STFIWX;
+ break;
+ default:
+ llvm_unreachable("Unknown Operation!");
}
+
unsigned TargetReg = MI.getOperand(0).getReg();
unsigned Opcode;
if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
@@ -2028,6 +2043,52 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
Opcode = UpperOpcode;
MI.setDesc(get(Opcode));
return true;
+}
+
+bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ auto &MBB = *MI.getParent();
+ auto DL = MI.getDebugLoc();
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::LOAD_STACK_GUARD: {
+ assert(Subtarget.isTargetLinux() &&
+ "Only Linux target is expected to contain LOAD_STACK_GUARD");
+ const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
+ const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
+ MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addImm(Offset)
+ .addReg(Reg);
+ return true;
+ }
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64: {
+ assert(Subtarget.hasP9Vector() &&
+ "Invalid D-Form Pseudo-ops on Pre-P9 target.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
+ "D-form op must have register and immediate operands");
+ return expandVSXMemPseudo(MI);
+ }
+ case PPC::XFLOADf32:
+ case PPC::XFSTOREf32:
+ case PPC::LIWAX:
+ case PPC::LIWZX:
+ case PPC::STIWX: {
+ assert(Subtarget.hasP8Vector() &&
+ "Invalid X-Form Pseudo-ops on Pre-P8 target.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
+ "X-form op must have register and register operands");
+ return expandVSXMemPseudo(MI);
+ }
+ case PPC::XFLOADf64:
+ case PPC::XFSTOREf64: {
+ assert(Subtarget.hasVSX() &&
+ "Invalid X-Form Pseudo-ops on target that has no VSX.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
+ "X-form op must have register and register operands");
+ return expandVSXMemPseudo(MI);
}
case PPC::SPILLTOVSR_LD: {
unsigned TargetReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 565392f76e4..f7a44b89233 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -282,6 +282,9 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
+ bool expandVSXMemPseudo(MachineInstr &MI) const;
+
// Lower pseudo instructions after register allocation.
bool expandPostRAPseudo(MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 3261bc9bc53..9ddb12ea8c1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -131,6 +131,12 @@ let Uses = [RM] in {
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
+ let isPseudo = 1, CodeSize = 3 in
+ def XFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+ "#XFLOADf64",
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
let Predicates = [HasVSX, HasOnlySwappingMemOps] in
def LXVD2X : XX1Form<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
@@ -156,6 +162,12 @@ let Uses = [RM] in {
"stxsdx $XT, $dst", IIC_LdStSTFD,
[(store f64:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
+ let isPseudo = 1, CodeSize = 3 in
+ def XFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#XFSTOREf64",
+ [(store f64:$XT, xoaddr:$dst)]>;
+
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// The behaviour of this instruction is endianness-specific so we provide no
// pattern to match it without considering endianness.
@@ -1215,32 +1227,59 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
- "lxsspx $XT, $src", IIC_LdStLFD,
- [(set f32:$XT, (load xoaddr:$src))]>;
+ "lxsspx $XT, $src", IIC_LdStLFD, []>;
def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwax $XT, $src", IIC_LdStLFD,
- [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ "lxsiwax $XT, $src", IIC_LdStLFD, []>;
def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwzx $XT, $src", IIC_LdStLFD,
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+ "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
+
+ // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
+ // would cause these Pseudos are not expanded in expandPostRAPseudos()
+ let isPseudo = 1 in {
+ // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+ let CodeSize = 3 in
+ def XFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrr:$src),
+ "#XFLOADf32",
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+ def LIWAX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWAX",
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+ def LIWZX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWZX",
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+ }
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
- "stxsspx $XT, $dst", IIC_LdStSTFD,
- [(store f32:$XT, xoaddr:$dst)]>;
+ "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
- "stxsiwx $XT, $dst", IIC_LdStSTFD,
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+ "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
+
+ // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
+ // would cause these Pseudos are not expanded in expandPostRAPseudos()
+ let isPseudo = 1 in {
+ // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+ let CodeSize = 3 in
+ def XFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrr:$dst),
+ "#XFSTOREf32",
+ [(store f32:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+ def STIWX : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#STIWX",
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+ }
} // mayStore
} // UseVSXReg = 1
def : Pat<(f64 (extloadf32 xoaddr:$src)),
- (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+ (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
- (f32 (LXSSPX xoaddr:$src))>;
+ (f32 (XFLOADf32 xoaddr:$src))>;
def : Pat<(f64 (fpextend f32:$src)),
(COPY_TO_REGCLASS $src, VSFRC)>;
@@ -1414,7 +1453,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
}
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
- (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>;
+ (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;
} // AddedComplexity = 400
} // HasP8Vector
@@ -3047,10 +3086,10 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+ (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+ (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
@@ -3068,19 +3107,19 @@ let AddedComplexity = 400 in {
}
let Predicates = [HasVSX, NoP9Vector] in {
- // Load-and-splat with fp-to-int conversion (using X-Form VSX loads).
+ // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+ (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+ (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+ (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+ (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
}
// Big endian, available on all targets with VSX
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 7d34efd4af3..c51368d6d2a 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -353,6 +353,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
break;
case PPC::LXSDX:
case PPC::LXSSPX:
+ case PPC::XFLOADf64:
+ case PPC::XFLOADf32:
// A load of a floating-point value into the high-order half of
// a vector register is safe, provided that we introduce a swap
// following the load, which will be done by the SUBREG_TO_REG
diff --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
new file mode 100644
index 00000000000..e38c5beb80e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9
+
+@a = external local_unnamed_addr global <4 x i32>, align 16
+@pb = external local_unnamed_addr global float*, align 8
+
+define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
+; CHECK-P8-LABEL: testExpandPostRAPseudo:
+; CHECK-P8: lxsiwax 34, 0, 3
+; CHECK-P8-NEXT: xxspltw 34, 34, 1
+; CHECK-P8-NEXT: stvx 2, 0, 4
+; CHECK-P8: #APP
+; CHECK-P8-NEXT: #Clobber Rigisters
+; CHECK-P8-NEXT: #NO_APP
+; CHECK-P8-NEXT: lis 4, 1024
+; CHECK-P8-NEXT: lfiwax 0, 0, 3
+; CHECK-P8: stfsx 0, 3, 4
+; CHECK-P8-NEXT: blr
+
+; CHECK-P9-LABEL: testExpandPostRAPseudo:
+; CHECK-P9: lxvwsx 0, 0, 3
+; CHECK-P9: stxvx 0, 0, 4
+; CHECK-P9: #APP
+; CHECK-P9-NEXT: #Clobber Rigisters
+; CHECK-P9-NEXT: #NO_APP
+; CHECK-P9-NEXT: lis 4, 1024
+; CHECK-P9-NEXT: lfiwax 0, 0, 3
+; CHECK-P9: stfsx 0, 3, 4
+; CHECK-P9-NEXT: blr
+
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ store <4 x i32> %splat.splat, <4 x i32>* @a, align 16
+ tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+ %1 = load i32, i32* %ptr, align 4
+ %conv = sitofp i32 %1 to float
+ %2 = load float*, float** @pb, align 8
+ %add.ptr = getelementptr inbounds float, float* %2, i64 16777216
+ store float %conv, float* %add.ptr, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index d71460b8d9a..fd1f4589870 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1485,10 +1485,10 @@ entry:
; P9BE: xvcvspsxws v2, [[REG1]]
; P9LE: [[REG1:[vs0-9]+]], 0, r3
; P9LE: xvcvspsxws v2, [[REG1]]
-; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]]
; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]]
; P8LE: xxspltw v2, vs[[REG2]], 1
}
@@ -1880,11 +1880,11 @@ entry:
; P9LE: xscvdpsxws
; P9LE: xxspltw
; P9LE: blr
-; P8BE: lxsdx
+; P8BE: lfdx
; P8BE: xscvdpsxws
; P8BE: xxspltw
; P8BE: blr
-; P8LE: lxsdx
+; P8LE: lfdx
; P8LE: xscvdpsxws
; P8LE: xxspltw
; P8LE: blr
@@ -2645,10 +2645,10 @@ entry:
; P9BE: xvcvspuxws v2, [[REG1]]
; P9LE: [[REG1:[vs0-9]+]], 0, r3
; P9LE: xvcvspuxws v2, [[REG1]]
-; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]]
; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3
+; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]]
; P8LE: xxspltw v2, vs[[REG2]], 1
}
@@ -3040,11 +3040,11 @@ entry:
; P9LE: xscvdpuxws
; P9LE: xxspltw
; P9LE: blr
-; P8BE: lxsdx
+; P8BE: lfdx
; P8BE: xscvdpuxws
; P8BE: xxspltw
; P8BE: blr
-; P8LE: lxsdx
+; P8LE: lfdx
; P8LE: xscvdpuxws
; P8LE: xxspltw
; P8LE: blr
@@ -3508,13 +3508,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3546,13 +3546,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpsxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3591,13 +3591,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3636,13 +3636,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpsxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpsxds v2
; P8LE-NEXT: blr
@@ -3693,11 +3693,11 @@ entry:
; P9LE-NEXT: xscvdpsxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE-NEXT: xscvdpsxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE-NEXT: xscvdpsxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr
@@ -4412,13 +4412,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4450,13 +4450,13 @@ entry:
; P9LE: xxmrghd
; P9LE-NEXT: xvcvdpuxds v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
-; P8BE: lxsspx
+; P8BE: lfsx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
-; P8LE: lxsspx
+; P8LE: lfsx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4495,13 +4495,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4540,13 +4540,13 @@ entry:
; P9LE-NEXT: blr
; P8BE: sldi
; P8BE: lfsux
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE: xxmrghd
; P8BE-NEXT: xvcvdpuxds v2
; P8BE-NEXT: blr
; P8LE: sldi
; P8LE: lfsux
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE: xxmrghd
; P8LE-NEXT: xvcvdpuxds v2
; P8LE-NEXT: blr
@@ -4597,11 +4597,11 @@ entry:
; P9LE-NEXT: xscvdpuxds
; P9LE-NEXT: xxspltd v2
; P9LE-NEXT: blr
-; P8BE: lxsspx
+; P8BE: lfsx
; P8BE-NEXT: xscvdpuxds
; P8BE-NEXT: xxspltd v2
; P8BE-NEXT: blr
-; P8LE: lxsspx
+; P8LE: lfsx
; P8LE-NEXT: xscvdpuxds
; P8LE-NEXT: xxspltd v2
; P8LE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/direct-move-profit.ll b/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
index 423f0ff0e37..7205d11a105 100644
--- a/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
+++ b/llvm/test/CodeGen/PowerPC/direct-move-profit.ll
@@ -17,7 +17,7 @@ entry:
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
-; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK: lfiwax [[REG:[0-9]+]], {{.*}}
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
; CHECK: xscvsxdsp {{.*}}, [[REG]]
@@ -40,7 +40,7 @@ entry:
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
-; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK: lfiwax [[REG:[0-9]+]], {{.*}}
; CHECK-NOT: mtvsrwa
; CHECK-NOT: mtfprwa
; CHECK: xscvsxdsp {{.*}}, [[REG]]
diff --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index 783833d6b02..0e50b3a68ce 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -1034,10 +1034,10 @@ entry:
define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) {
entry:
; CHECK-LABEL: insertVarF
-; CHECK: stxsspx 1,
+; CHECK: stfsx 1,
; CHECK: lxv
; CHECK-BE-LABEL: insertVarF
-; CHECK-BE: stxsspx 1,
+; CHECK-BE: stfsx 1,
; CHECK-BE: lxv
%vecins = insertelement <4 x float> %a, float %f, i32 %el
ret <4 x float> %vecins
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
index 3a425406d04..01430671824 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -43,7 +43,7 @@ entry:
}
; CHECK: @callee2
; CHECK: addi [[TOCREG:[0-9]+]], 1, 136
-; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
+; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
; CHECK: blr
define void @caller2() {
@@ -54,7 +54,7 @@ entry:
}
; CHECK: @caller2
; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136
-; CHECK: stxsspx {{[0-9]+}}, 0, [[TOCOFF]]
+; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]]
; CHECK: bl test2
declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
index aacd64e401f..02301ea4028 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -57,7 +57,7 @@ L.LB38_2452:
}
; CHECK-LABEL: @aercalc_
-; CHECK: lxsspx
+; CHECK: lfsx
; CHECK: xxspltd
; CHECK: stxvd2x
; CHECK-NOT: xxswapd
diff --git a/llvm/test/CodeGen/PowerPC/pr25157.ll b/llvm/test/CodeGen/PowerPC/pr25157.ll
index ee9a0034f2c..27f50b0f8e6 100644
--- a/llvm/test/CodeGen/PowerPC/pr25157.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25157.ll
@@ -57,6 +57,6 @@ L.LB38_2452:
}
; CHECK-LABEL: @aercalc_
-; CHECK: lxsspx
+; CHECK: lfsx
; CHECK-P9-LABEL: @aercalc_
; CHECK-P9: lfs
diff --git a/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll b/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
index f880d1faf9d..6be31eaea74 100644
--- a/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
+++ b/llvm/test/CodeGen/PowerPC/select-addrRegRegOnly.ll
@@ -6,7 +6,7 @@ define float @testSingleAccess(i32* nocapture readonly %arr) local_unnamed_addr
; CHECK-LABEL: testSingleAccess:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: addi 3, 3, 8
-; CHECK-NEXT: lxsiwax 0, 0, 3
+; CHECK-NEXT: lfiwax 0, 0, 3
; CHECK-NEXT: xscvsxdsp 1, 0
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index 29548123be8..fd864805abd 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -780,7 +780,7 @@ define double @sel_constants_frem_constant(i1 %cond) {
; ALL-NEXT: .LBB38_2:
; ALL-NEXT: addis 3, 2, .LCPI38_1@toc@ha
; ALL-NEXT: addi 3, 3, .LCPI38_1@toc@l
-; ALL-NEXT: lxsspx 1, 0, 3
+; ALL-NEXT: lfsx 1, 0, 3
; ALL-NEXT: blr
%sel = select i1 %cond, double -4.0, double 23.3
%bo = frem double %sel, 5.1
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 98fe3a813cb..0a4db39977a 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -17,8 +17,8 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi0
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 1, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 1, 0
+; CHECK-DAG: xxspltd 1, 1, 0
+; CHECK-DAG: xxswapd 0, 0
; CHECK: xxpermdi 34, 0, 1, 1
; CHECK-P9-LABEL: testi0
@@ -37,8 +37,8 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi1
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 1, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 1, 1, 0
+; CHECK-DAG: xxspltd 1, 1, 0
+; CHECK-DAG: xxswapd 0, 0
; CHECK: xxmrgld 34, 1, 0
; CHECK-P9-LABEL: testi1
diff --git a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 7da2ea27c18..d8ea0dc992f 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -20,7 +20,7 @@ entry:
ret void
; CHECK-LABEL: @dblToInt
; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV1]],
+; CHECK: stfiwx [[REGCONV1]],
}
; Function Attrs: nounwind
@@ -33,7 +33,7 @@ entry:
ret void
; CHECK-LABEL: @fltToInt
; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV2]],
+; CHECK: stfiwx [[REGCONV2]],
}
; Function Attrs: nounwind
@@ -45,7 +45,7 @@ entry:
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @intToDbl
-; CHECK: lxsiwax [[REGLD1:[0-9]+]],
+; CHECK: lfiwax [[REGLD1:[0-9]+]],
; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]]
}
@@ -58,7 +58,7 @@ entry:
store volatile float %conv, float* %ff, align 4
ret void
; CHECK-LABEL: @intToFlt
-; CHECK: lxsiwax [[REGLD2:[0-9]+]],
+; CHECK: lfiwax [[REGLD2:[0-9]+]],
; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
}
@@ -72,7 +72,7 @@ entry:
ret void
; CHECK-LABEL: @dblToUInt
; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV3]],
+; CHECK: stfiwx [[REGCONV3]],
}
; Function Attrs: nounwind
@@ -85,7 +85,7 @@ entry:
ret void
; CHECK-LABEL: @fltToUInt
; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]],
-; CHECK: stxsiwx [[REGCONV4]],
+; CHECK: stfiwx [[REGCONV4]],
}
; Function Attrs: nounwind
@@ -97,7 +97,7 @@ entry:
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @uIntToDbl
-; CHECK: lxsiwzx [[REGLD3:[0-9]+]],
+; CHECK: lfiwzx [[REGLD3:[0-9]+]],
; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]]
}
@@ -110,7 +110,7 @@ entry:
store volatile float %conv, float* %ff, align 4
ret void
; CHECK-LABEL: @uIntToFlt
-; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
+; CHECK: lfiwzx [[REGLD4:[0-9]+]],
; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
}
@@ -124,7 +124,7 @@ entry:
ret void
; CHECK-LABEL: @dblToFloat
; CHECK: lxsdx [[REGLD5:[0-9]+]],
-; CHECK: stxsspx [[REGLD5]],
+; CHECK: stfsx [[REGLD5]],
; CHECK-P9-LABEL: @dblToFloat
; CHECK-P9: lfd [[REGLD5:[0-9]+]],
; CHECK-P9: stfs [[REGLD5]],
@@ -139,7 +139,7 @@ entry:
store volatile double %conv, double* %dd, align 8
ret void
; CHECK-LABEL: @floatToDbl
-; CHECK: lxsspx [[REGLD5:[0-9]+]],
+; CHECK: lfsx [[REGLD5:[0-9]+]],
; CHECK: stxsdx [[REGLD5]],
; CHECK-P9-LABEL: @floatToDbl
; CHECK-P9: lfs [[REGLD5:[0-9]+]],
OpenPOWER on IntegriCloud