summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td54
-rw-r--r--llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll63
-rw-r--r--llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll128
3 files changed, 245 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 942e8b392b8..21e4a9d4bf0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2550,6 +2550,44 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
UseVSXReg;
} // mayStore
+ let Predicates = [IsLittleEndian] in {
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+ }
+
+ let Predicates = [IsBigEndian] in {
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+ def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+ def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+ }
+
// Patterns for which instructions from ISA 3.0 are a better match
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
@@ -2560,6 +2598,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
@@ -2587,6 +2633,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+ def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
diff --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index fe34bcb8563..783833d6b02 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -439,6 +439,69 @@ entry:
ret float %conv
}
+; Verify we generate optimal code for unsigned vector int elem extract followed
+; by conversion to double
+
+define double @conv2dlbTestui0(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dlbTestui0
+; CHECK: xxextractuw [[SW:[0-9]+]], 34, 12
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dlbTestui0
+; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 0
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <4 x i32> %a, i32 0
+ %1 = uitofp i32 %0 to double
+ ret double %1
+}
+
+define double @conv2dlbTestui1(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dlbTestui1
+; CHECK: xxextractuw [[SW:[0-9]+]], 34, 8
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dlbTestui1
+; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 4
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <4 x i32> %a, i32 1
+ %1 = uitofp i32 %0 to double
+ ret double %1
+}
+
+define double @conv2dlbTestui2(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dlbTestui2
+; CHECK: xxextractuw [[SW:[0-9]+]], 34, 4
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dlbTestui2
+; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 8
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <4 x i32> %a, i32 2
+ %1 = uitofp i32 %0 to double
+ ret double %1
+}
+
+define double @conv2dlbTestui3(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dlbTestui3
+; CHECK: xxextractuw [[SW:[0-9]+]], 34, 0
+; CHECK: xscvuxddp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dlbTestui3
+; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 12
+; CHECK-BE: xscvuxddp 1, [[CP]]
+ %0 = extractelement <4 x i32> %a, i32 3
+ %1 = uitofp i32 %0 to double
+ ret double %1
+}
+
+; verify we don't crash for variable elem extract
+define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 %elem
+ %conv = uitofp i32 %vecext to double
+ ret double %conv
+}
+
define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
entry:
; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
index 6b845cbf380..40e39c64dea 100644
--- a/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
+++ b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
@@ -105,3 +105,131 @@ entry:
%1 = uitofp i64 %0 to float
ret float %1
}
+
+define float @conv2fltTesti0(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2fltTesti0
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
+; CHECK: xvcvsxwsp [[SW]], [[SW]]
+; CHECK: xscvspdpn 1, [[SW]]
+; CHECK-BE-LABEL: conv2fltTesti0
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
+; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
+; CHECK-BE: xscvspdpn 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @conv2fltTesti1(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2fltTesti1
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
+; CHECK: xvcvsxwsp [[SW]], [[SW]]
+; CHECK: xscvspdpn 1, [[SW]]
+; CHECK-BE-LABEL: conv2fltTesti1
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
+; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
+; CHECK-BE: xscvspdpn 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = sitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @conv2fltTesti2(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2fltTesti2
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
+; CHECK: xvcvsxwsp [[SW]], [[SW]]
+; CHECK: xscvspdpn 1, [[SW]]
+; CHECK-BE-LABEL: conv2fltTesti2
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
+; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
+; CHECK-BE: xscvspdpn 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 2
+ %conv = sitofp i32 %vecext to float
+ ret float %conv
+}
+
+define float @conv2fltTesti3(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2fltTesti3
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
+; CHECK: xvcvsxwsp [[SW]], [[SW]]
+; CHECK: xscvspdpn 1, [[SW]]
+; CHECK-BE-LABEL: conv2fltTesti3
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
+; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
+; CHECK-BE: xscvspdpn 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 3
+ %conv = sitofp i32 %vecext to float
+ ret float %conv
+}
+
+; verify we don't crash for variable elem extract
+define float @conv2fltTestiVar(<4 x i32> %a, i32 zeroext %elem) {
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 %elem
+ %conv = sitofp i32 %vecext to float
+ ret float %conv
+}
+
+define double @conv2dblTesti0(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dblTesti0
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
+; CHECK: xvcvsxwdp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dblTesti0
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
+; CHECK-BE: xvcvsxwdp 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sitofp i32 %vecext to double
+ ret double %conv
+}
+
+define double @conv2dblTesti1(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dblTesti1
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
+; CHECK: xvcvsxwdp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dblTesti1
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
+; CHECK-BE: xvcvsxwdp 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = sitofp i32 %vecext to double
+ ret double %conv
+}
+
+define double @conv2dblTesti2(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dblTesti2
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
+; CHECK: xvcvsxwdp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dblTesti2
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
+; CHECK-BE: xvcvsxwdp 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 2
+ %conv = sitofp i32 %vecext to double
+ ret double %conv
+}
+
+define double @conv2dblTesti3(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: conv2dblTesti3
+; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
+; CHECK: xvcvsxwdp 1, [[SW]]
+; CHECK-BE-LABEL: conv2dblTesti3
+; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
+; CHECK-BE: xvcvsxwdp 1, [[CP]]
+ %vecext = extractelement <4 x i32> %a, i32 3
+ %conv = sitofp i32 %vecext to double
+ ret double %conv
+}
+
+; verify we don't crash for variable elem extract
+define double @conv2dblTestiVar(<4 x i32> %a, i32 zeroext %elem) {
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 %elem
+ %conv = sitofp i32 %vecext to double
+ ret double %conv
+}
OpenPOWER on IntegriCloud