diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-25 17:41:58 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-06-25 17:41:58 +0000 |
| commit | 07cdffc324b8fd336b04e8a7f83d7c3ce1ba7815 (patch) | |
| tree | d717e6b4dc4e7d246c638db18be77822343093d6 /llvm/lib/Target | |
| parent | a826147eef05d1d4fa0f3efdd1eeb4c71ae13b9d (diff) | |
| download | bcm5719-llvm-07cdffc324b8fd336b04e8a7f83d7c3ce1ba7815.tar.gz bcm5719-llvm-07cdffc324b8fd336b04e8a7f83d7c3ce1ba7815.zip | |
[X86] Always prefer to lower a VECTOR_SHUFFLE into a BLENDI instead of SHUFP (or VPERM2X128).
This patch teaches method 'LowerVECTOR_SHUFFLE' to give higher precedence to
the check for 'isBlendMask'; the idea is that, when possible, we should firstly
check if a shuffle performs a blend, and in case, try to lower it into a BLENDI
instead of selecting a SHUFP or (worse) a VPERM2X128.
In general:
- AVX VBLENDPS/D always have better latency and throughput than VPERM2F128;
- BLENDPS/D instructions tend to always have better 'reciprocal throughput'
than the equivalent SHUFPS/D;
- Both BLENDPS/D and SHUFPS/D are often decoded into the same number of
m-ops; however, a m-op obtained from a BLENDPS/D can be scheduled to more
than one execution port.
This patch:
- Moves the check for 'isBlendMask' immediately before the check for
'isSHUFPMask' within method 'LowerVECTOR_SHUFFLE';
- Updates existing tests for sse/avx shuffle/blend instructions to verify
that we select (v)blendps/d when possible (instead of (v)shufps/d or
vperm2f128).
llvm-svn: 211720
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 |
2 files changed, 7 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 989e6f1ee81..cde413f4551 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8337,6 +8337,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShufflePSHUFLWImmediate(SVOp), DAG); + unsigned MaskValue; + if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(), + &MaskValue)) + return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG); + if (isSHUFPMask(M, VT)) return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2, getShuffleSHUFImmediate(SVOp), DAG); @@ -8374,11 +8379,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - unsigned MaskValue; - if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(), - &MaskValue)) - return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG); - if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT)) return getINSERTPS(SVOp, dl, DAG); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 988a0598975..e6ca519b7d5 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5374,8 +5374,8 @@ let Predicates = [HasAVX] in { // - the 1st and 3rd element from the first input vector (the 'fsub' node); // - the 2nd and 4th element from the second input vector (the 'fadd' node). - def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)), - (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), + def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), |

