diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-21 01:31:15 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-21 01:31:15 +0000 |
commit | e5015d8abad0e4980c3a11cd4d68f65907504ffd (patch) | |
tree | b1f459cd59339dd48e76d6379430835b68c4d5a5 /llvm/lib | |
parent | 252cbe25cb7e02df422cc9f4ac0fdf235c6b2cd8 (diff) | |
download | bcm5719-llvm-e5015d8abad0e4980c3a11cd4d68f65907504ffd.tar.gz bcm5719-llvm-e5015d8abad0e4980c3a11cd4d68f65907504ffd.zip |
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 11c3f11f2cd..a33d2cc8321 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5355,6 +5355,52 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { f128mem, SSE_ALU_F64P>, PD; } +// Patterns used to select 'addsub' instructions. +let Predicates = [HasAVX] in { + // Constant 170 corresponds to the binary mask '10101010'. + // When used as a blend mask, it allows selecting eight elements from two + // input vectors as follow: + // - Even-numbered values in the destination are copied from + // the corresponding elements in the first input vector; + // - Odd-numbered values in the destination are copied from + // the corresponding elements in the second input vector. + + def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)), + (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))), + (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; + + // Constant 10 corresponds to the binary mask '1010'. + // In the two pattens below, constant 10 is used as a blend mask to select + // - the 1st and 3rd element from the first input vector (the 'fsub' node); + // - the 2nd and 4th element from the second input vector (the 'fadd' node). + + def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), + (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + +let Predicates = [UseSSE3] in { + // Constant 10 corresponds to the binary mask '1010'. + // In the pattern below, it is used as a blend mask to select: + // - the 1st and 3rd element from the first input vector (the fsub node); + // - the 2nd and 4th element from the second input vector (the fadd node). + + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + //===---------------------------------------------------------------------===// // SSE3 Instructions //===---------------------------------------------------------------------===// |