summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp230
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp10
-rw-r--r--llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/pr25157-peephole.ll61
-rw-r--r--llvm/test/CodeGen/PowerPC/swaps-le-5.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/swaps-le-6.ll6
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx.ll3
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll6
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll20
-rw-r--r--llvm/test/Transforms/PlaceSafepoints/finite-loops.ll7
11 files changed, 322 insertions, 28 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index ae8d8b4f5df..e157fd37c6e 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -41,6 +41,7 @@ namespace llvm {
FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
+ FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 00000000000..fe339d70d7d
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer. It runs at the end of
+// the SSA phases, following VSX swap removal. A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here. Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects: it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+ void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ PPCMIPeephole() : MachineFunctionPass(ID) {
+ initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ // Perform peepholes.
+ bool simplifyCode(void);
+
+ // Find the "true" register represented by SrcReg (following chains
+ // of copies and subreg_to_reg operations).
+ unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ return simplifyCode();
+ }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+ DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+ bool Simplified = false;
+ MachineInstr* ToErase = nullptr;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // If the previous instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // Ignore debug instructions.
+ if (MI.isDebugValue())
+ continue;
+
+ // Per-opcode peepholes.
+ switch (MI.getOpcode()) {
+
+ default:
+ break;
+
+ case PPC::XXPERMDI: {
+ // Perform simplifications of 2x64 vector swaps and splats.
+ // A swap is identified by an immediate value of 2, and a splat
+ // is identified by an immediate value of 0 or 3.
+ int Immed = MI.getOperand(3).getImm();
+
+ if (Immed != 1) {
+
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+ unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+ if (TrueReg1 == TrueReg2
+ && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+ unsigned FeedImmed = DefMI->getOperand(3).getImm();
+ unsigned FeedReg1
+ = lookThruCopyLike(DefMI->getOperand(1).getReg());
+ unsigned FeedReg2
+ = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+ if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs()
+ << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3)
+ && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+ MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // If the last instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+ }
+
+ return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+ while (true) {
+
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index df687b2cade..24a9ef0ef07 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+ cl::desc("Disable machine peepholes for PPC"));
+
static cl::opt<bool>
EnableGEPOpt("ppc-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
@@ -348,6 +352,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
!DisableVSXSwapRemoval)
addPass(createPPCVSXSwapRemovalPass());
+ // Target-specific peephole cleanups performed after instruction
+ // selection.
+ if (!DisableMIPeephole) {
+ addPass(createPPCMIPeepholePass());
+ addPass(&DeadMachineInstructionElimID);
+ }
}
void PPCPassConfig::addPreRegAlloc() {
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 35d501e40cb..7e8991647ae 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -63,7 +63,7 @@ entry:
ret <2 x i64> %splat.splat
; CHECK: mtvsrd {{[0-9]+}}, 3
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
}
; Function Attrs: nounwind
diff --git a/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
new file mode 100644
index 00000000000..c5bd49b492c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify peephole simplification of splats and swaps. Bugpoint-reduced
+; test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+ br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+ br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+ unreachable
+
+L.LB38_2426:
+ br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+ br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+ br label %L.LB38_2140
+
+L.LB38_2140:
+ ret void
+
+L.LB38_2438:
+ br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+ br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+ unreachable
+
+L.LB38_2451:
+ br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+ unreachable
+
+L.LB38_2452:
+ %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+ %1 = fpext float %0 to double
+ %2 = insertelement <2 x double> undef, double %1, i32 1
+ store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+ unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
+; CHECK: xxspltd
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
index 5cd739a0efa..3e13bd16c23 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,11 +15,11 @@ entry:
}
; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
define void @bar1(double %y) {
entry:
@@ -30,11 +30,11 @@ entry:
}
; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
define void @baz0() {
entry:
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
index 365aeee2d8f..df88322e4fd 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -20,8 +20,7 @@ entry:
; CHECK-LABEL: @bar0
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
; CHECK: stxvd2x [[REG5]]
@@ -37,8 +36,7 @@ entry:
; CHECK-LABEL: @bar1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
; CHECK: stxvd2x [[REG5]]
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index b4b1d248d1a..b2eefb66676 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1228,9 +1228,8 @@ define <2 x i32> @test80(i32 %v) {
; CHECK-LE-LABEL: @test80
; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxspltd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[R1]]
; CHECK-LE-DAG: xxswapd 35, [[V2]]
; CHECK-LE: vaddudm 2, 2, 3
; CHECK-LE: blr
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 6c89b1092bd..97e1548f965 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -33,12 +33,8 @@ define double @teste0(<2 x double>* %p1) {
%r = extractelement <2 x double> %v, i32 0
ret double %r
-; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
-
; CHECK-LABEL: teste0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 1, 0
+; CHECK: lxvd2x 1, 0, 3
}
define double @teste1(<2 x double>* %p1) {
diff --git a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index dcfa0e78886..4f767c7ca78 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -8,8 +8,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test00
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
}
define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
@@ -58,9 +57,7 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
ret <2 x double> %v3
; CHECK-LABEL: @test10
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 3
}
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -71,8 +68,7 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test11
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
}
define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
@@ -139,8 +135,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test22
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
}
define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
@@ -189,9 +184,7 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
ret <2 x double> %v3
; CHECK-LABEL: @test32
-; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 4
}
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
@@ -202,6 +195,5 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test33
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
}
diff --git a/llvm/test/Transforms/PlaceSafepoints/finite-loops.ll b/llvm/test/Transforms/PlaceSafepoints/finite-loops.ll
index 9121e92896c..b98073d6a6e 100644
--- a/llvm/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/llvm/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -11,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -32,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -56,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -77,12 +80,14 @@ define void @test4(i64 %upper) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK: statepoint
+; CHECK-LABEL: exit
; COUNTED-64-LABEL: test4
; COUNTED-64-LABEL: entry
; COUNTED-64: statepoint
; COUNTED-64-LABEL: loop
; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
entry:
br label %loop
@@ -105,12 +110,14 @@ define void @test5(i64 %upper) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK: statepoint
+; CHECK-LABEL: exit
; COUNTED-64-LABEL: test5
; COUNTED-64-LABEL: entry
; COUNTED-64: statepoint
; COUNTED-64-LABEL: loop
; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
entry:
br label %loop
OpenPOWER on IntegriCloud