summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2016-07-18 15:30:00 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2016-07-18 15:30:00 +0000
commitd3c284f645f896a0957ec040ac735fe44c53725e (patch)
tree382f06adf132566f6b7e3f1aea634f72a0399454
parenta645433c5fac7910c199e8bd381942f2b6f0c0d5 (diff)
downloadbcm5719-llvm-d3c284f645f896a0957ec040ac735fe44c53725e.tar.gz
bcm5719-llvm-d3c284f645f896a0957ec040ac735fe44c53725e.zip
[PowerPC] Remove redundant direct moves when extracting integers and converting to FP
This patch corresponds to review: https://reviews.llvm.org/D21354 We use direct moves for extracting integer elements from vectors. We also use direct moves when converting integers to FP. When these operations are chained, we get a direct move out of a VSR followed by a direct move back into a VSR. These are redundant - all we need to do is line up the element and convert. llvm-svn: 275796
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td43
-rw-r--r--llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll107
2 files changed, 150 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 40b7ae38530..a02ace00a76 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1029,6 +1029,28 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
+let Predicates = [IsLittleEndian] in {
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+} // IsLittleEndian
+
+let Predicates = [IsBigEndian] in {
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+} // IsBigEndian
+
} // AddedComplexity
} // HasVSX
@@ -1235,6 +1257,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
+ let Predicates = [IsLittleEndian] in {
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ }
+
+ let Predicates = [IsBigEndian] in {
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
+ def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
+ def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ }
} // AddedComplexity = 400
} // HasP8Vector
diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
new file mode 100644
index 00000000000..6b845cbf380
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
@@ -0,0 +1,107 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+define double @test1(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvsxddp 1, [[SW]]
+; CHECK-BE-LABEL: test1
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvsxddp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
+define double @test2(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvsxddp 1, [[CP]]
+; CHECK-BE-LABEL: test2
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvsxddp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
+define float @test1f(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1f
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvsxdsp 1, [[SW]]
+; CHECK-BE-LABEL: test1f
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvsxdsp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
+
+define float @test2f(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2f
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvsxdsp 1, [[CP]]
+; CHECK-BE-LABEL: test2f
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvsxdsp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
+
+define double @test1u(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1u
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: test1u
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = uitofp i64 %0 to double
+ ret double %1
+}
+
+define double @test2u(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2u
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvuxddp 1, [[CP]]
+; CHECK-BE-LABEL: test2u
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvuxddp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = uitofp i64 %0 to double
+ ret double %1
+}
+
+define float @test1fu(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test1fu
+; CHECK: xxswapd [[SW:[0-9]+]], 34
+; CHECK: xscvuxdsp 1, [[SW]]
+; CHECK-BE-LABEL: test1fu
+; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK-BE: xscvuxdsp 1, [[CP]]
+ %0 = extractelement <2 x i64> %a, i32 0
+ %1 = uitofp i64 %0 to float
+ ret float %1
+}
+
+define float @test2fu(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2fu
+; CHECK: xxlor [[CP:[0-9]+]], 34, 34
+; CHECK: xscvuxdsp 1, [[CP]]
+; CHECK-BE-LABEL: test2fu
+; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
+; CHECK-BE: xscvuxdsp 1, [[SW]]
+ %0 = extractelement <2 x i64> %a, i32 1
+ %1 = uitofp i64 %0 to float
+ ret float %1
+}
OpenPOWER on IntegriCloud