summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2014-12-09 16:43:32 +0000
committerBill Schmidt <wschmidt@linux.vnet.ibm.com>2014-12-09 16:43:32 +0000
commit10f6eb91a085a56de43e4f7eb685c8bff3a8ac6a (patch)
treebf0d75013bb1f626970ae6005b1cb7a17b286755 /llvm/lib/Target
parentcbc5703aeb2164ceddd4006cf965ab648618926d (diff)
downloadbcm5719-llvm-10f6eb91a085a56de43e4f7eb685c8bff3a8ac6a.tar.gz
bcm5719-llvm-10f6eb91a085a56de43e4f7eb685c8bff3a8ac6a.zip
[PowerPC 2/4] Little-endian adjustments for VSX insert/extract operations
For little endian, we need to make some straightforward adjustments in the code expansions for scalar_to_vector and vector_extract of v2f64. First, scalar_to_vector must place the scalar into vector element zero. However, our implementation of SUBREG_TO_REG will place it into big-element vector element zero (high-order bits), and for little endian we need it in the low-order bits. The LE implementation splats the high-order doubleword into the low-order doubleword. Second, the meaning of (vector_extract x, 0) and (vector_extract x, 1) must be reversed for similar reasons. A new test is added that tests code generation for insertelement and extractelement for both element 0 and element 1. It is disabled in this patch but enabled in patch 4/4, for reasons stated in the test. llvm-svn: 223788
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td14
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d8003c9d8c8..b21b251443e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -793,6 +793,8 @@ def : InstAlias<"xxswapd $XT, $XB",
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+
+let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -800,6 +802,18 @@ def : Pat<(f64 (vector_extract v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
def : Pat<(f64 (vector_extract v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+}
+
+let Predicates = [IsLittleEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+}
// Additional fnmsub patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
OpenPOWER on IntegriCloud