summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp31
-rw-r--r--llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll44
-rw-r--r--llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll116
3 files changed, 136 insertions, 55 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 7eeff007b78..f42b4bcfc3c 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -382,13 +382,23 @@ bool PPCMIPeephole::simplifyCode(void) {
// If this is a splat or a swap fed by another splat, we
// can replace it with a copy.
if (DefOpc == PPC::XXPERMDI) {
- unsigned FeedImmed = DefMI->getOperand(3).getImm();
- unsigned FeedReg1 =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- unsigned FeedReg2 =
- TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
+ unsigned DefReg1 = DefMI->getOperand(1).getReg();
+ unsigned DefReg2 = DefMI->getOperand(2).getReg();
+ unsigned DefImmed = DefMI->getOperand(3).getImm();
+
+ // If the two inputs are not the same register, check to see if
+ // they originate from the same virtual register after only
+ // copy-like instructions.
+ if (DefReg1 != DefReg2) {
+ unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+ unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+
+ if (FeedReg1 != FeedReg2 ||
+ Register::isPhysicalRegister(FeedReg1))
+ break;
+ }
- if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+ if (DefImmed == 0 || DefImmed == 3) {
LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
"to splat/copy: ");
LLVM_DEBUG(MI.dump());
@@ -402,19 +412,18 @@ bool PPCMIPeephole::simplifyCode(void) {
// If this is a splat fed by a swap, we can simplify modify
// the splat to splat the other value from the swap's input
// parameter.
- else if ((Immed == 0 || Immed == 3)
- && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
LLVM_DEBUG(MI.dump());
- MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
- MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+ MI.getOperand(1).setReg(DefReg1);
+ MI.getOperand(2).setReg(DefReg2);
MI.getOperand(3).setImm(3 - Immed);
Simplified = true;
}
// If this is a swap fed by a swap, we can replace it
// with a copy from the first swap's input.
- else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ else if (Immed == 2 && DefImmed == 2) {
LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
LLVM_DEBUG(MI.dump());
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
new file mode 100644
index 00000000000..1347026e3e1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll
@@ -0,0 +1,44 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \
+; RUN: | FileCheck --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \
+; RUN: | FileCheck --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \
+; RUN: | FileCheck --check-prefix=CHECK-P9LE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \
+; RUN: | FileCheck --check-prefix=CHECK-P9BE %s
+
+define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind {
+ %added = fadd <2 x double> %x, %y
+ %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone
+ %res1 = extractelement <2 x double> %call, i32 0
+ %res2 = extractelement <2 x double> %call, i32 1
+ %ret = fsub double %res1, %res2
+ ret double %ret
+
+; CHECK-LE-LABEL: splat_swap:
+; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
+; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]]
+; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]]
+; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
+;
+; CHECK-BE-LABEL: splat_swap:
+; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
+; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]]
+; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]]
+; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
+;
+; CHECK-P9LE-LABEL: splat_swap:
+; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
+; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]]
+; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]]
+; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
+;
+; CHECK-P9BE-LABEL: splat_swap:
+; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
+; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]]
+; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]]
+; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
+}
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 0b4defcd88a..0b802d52837 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -468,12 +468,13 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
; PC64LE-NEXT: lfs 1, .LCPI8_3@toc@l(3)
; PC64LE-NEXT: bl fmod
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
-; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: xxlor 2, 63, 63
+; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -509,10 +510,11 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
; PC64LE9-NEXT: bl fmod
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 80
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -1715,12 +1717,13 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI33_3@toc@l(3)
; PC64LE-NEXT: bl pow
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
-; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: xxlor 2, 63, 63
+; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 96
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -1756,10 +1759,11 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
; PC64LE9-NEXT: bl pow
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 80
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -2113,11 +2117,12 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI38_2@toc@l(3)
; PC64LE-NEXT: bl __powidf2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -2150,9 +2155,10 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
; PC64LE9-NEXT: bl __powidf2
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -2477,11 +2483,12 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI43_2@toc@l(3)
; PC64LE-NEXT: bl sin
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -2511,9 +2518,10 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
; PC64LE9-NEXT: bl sin
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -2828,11 +2836,12 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI48_2@toc@l(3)
; PC64LE-NEXT: bl cos
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -2862,9 +2871,10 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
; PC64LE9-NEXT: bl cos
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -3179,11 +3189,12 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI53_2@toc@l(3)
; PC64LE-NEXT: bl exp
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -3213,9 +3224,10 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
; PC64LE9-NEXT: bl exp
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -3530,11 +3542,12 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI58_2@toc@l(3)
; PC64LE-NEXT: bl exp2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -3564,9 +3577,10 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
; PC64LE9-NEXT: bl exp2
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -3881,11 +3895,12 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI63_2@toc@l(3)
; PC64LE-NEXT: bl log
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -3915,9 +3930,10 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
; PC64LE9-NEXT: bl log
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -4232,11 +4248,12 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI68_2@toc@l(3)
; PC64LE-NEXT: bl log10
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -4266,9 +4283,10 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
; PC64LE9-NEXT: bl log10
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -4583,11 +4601,12 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI73_2@toc@l(3)
; PC64LE-NEXT: bl log2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -4617,9 +4636,10 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
; PC64LE9-NEXT: bl log2
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -4934,11 +4954,12 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI78_2@toc@l(3)
; PC64LE-NEXT: bl rint
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -4968,9 +4989,10 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; PC64LE9-NEXT: bl rint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -5285,11 +5307,12 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3)
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -5319,9 +5342,10 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -5672,11 +5696,12 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3)
; PC64LE-NEXT: bl fmax
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -5712,9 +5737,10 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; PC64LE9-NEXT: bl fmax
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
@@ -6084,11 +6110,12 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3)
; PC64LE-NEXT: bl fmin
; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: fmr 3, 1
-; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: xxlor 2, 63, 63
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
; PC64LE-NEXT: addi 1, 1, 80
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
@@ -6124,9 +6151,10 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; PC64LE9-NEXT: bl fmin
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xscpsgndp 1, 63, 63
+; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
OpenPOWER on IntegriCloud