summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-06-21 01:31:15 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-06-21 01:31:15 +0000
commite5015d8abad0e4980c3a11cd4d68f65907504ffd (patch)
treeb1f459cd59339dd48e76d6379430835b68c4d5a5 /llvm/lib
parent252cbe25cb7e02df422cc9f4ac0fdf235c6b2cd8 (diff)
downloadbcm5719-llvm-e5015d8abad0e4980c3a11cd4d68f65907504ffd.tar.gz
bcm5719-llvm-e5015d8abad0e4980c3a11cd4d68f65907504ffd.zip
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions from a sequence of "vadd + vsub + blend". Example: /// typedef float float4 __attribute__((ext_vector_type(4))); float4 foo(float4 A, float4 B) { float4 X = A - B; float4 Y = A + B; return (float4){X[0], Y[1], X[2], Y[3]}; } /// Before this patch, (with flag -mcpu=corei7) llc produced the following assembly sequence: movaps %xmm0, %xmm2 addps %xmm1, %xmm2 subps %xmm1, %xmm0 blendps $10, %xmm2, %xmm0 With this patch, we now get a single addsubps %xmm1, %xmm0 llvm-svn: 211427
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td46
1 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 11c3f11f2cd..a33d2cc8321 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5355,6 +5355,52 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
f128mem, SSE_ALU_F64P>, PD;
}
+// Patterns used to select 'addsub' instructions.
+let Predicates = [HasAVX] in {
+ // Constant 170 corresponds to the binary mask '10101010'.
+ // When used as a blend mask, it allows selecting eight elements from two
+ // input vectors as follow:
+ // - Even-numbered values in the destination are copied from
+ // the corresponding elements in the first input vector;
+ // - Odd-numbered values in the destination are copied from
+ // the corresponding elements in the second input vector.
+
+ def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
+ (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
+ (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the two pattens below, constant 10 is used as a blend mask to select
+ // - the 1st and 3rd element from the first input vector (the 'fsub' node);
+ // - the 2nd and 4th element from the second input vector (the 'fadd' node).
+
+ def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
+let Predicates = [UseSSE3] in {
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the pattern below, it is used as a blend mask to select:
+ // - the 1st and 3rd element from the first input vector (the fsub node);
+ // - the 2nd and 4th element from the second input vector (the fadd node).
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 Instructions
//===---------------------------------------------------------------------===//
OpenPOWER on IntegriCloud