diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-11-20 11:46:37 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-11-20 11:46:37 +0000 |
| commit | a6fb85ffa7bec1df628739db991b3b21186bedc6 (patch) | |
| tree | cb138b2a9513112ce14041d72ef842a02fd161eb /llvm/lib | |
| parent | 7198506ba85ca4204f349c8c9b2807adaa434dd2 (diff) | |
| download | bcm5719-llvm-a6fb85ffa7bec1df628739db991b3b21186bedc6.tar.gz bcm5719-llvm-a6fb85ffa7bec1df628739db991b3b21186bedc6.zip | |
[X86][SSE] Lower immediately to PACKUS instead of VECTOR_SHUFFLE.
As discussed on rL347240, this avoids some regressions on D54679 and also helps some combines to kick in a bit earlier.
llvm-svn: 347300
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 |
1 files changed, 4 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1d8702d38e0..07db9d7f13f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23413,7 +23413,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - // Extract the lo parts to any extend to i16 + // Extract the lo parts to any extend to i16. // We're going to mask off the low byte of each result element of the // pmullw, so it doesn't matter what's in the high byte of each 16-bit // element. @@ -23423,7 +23423,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, ALo = DAG.getBitcast(ExVT, ALo); BLo = DAG.getBitcast(ExVT, BLo); - // Extract the hi parts to any extend to i16 + // Extract the hi parts to any extend to i16. // We're going to mask off the low byte of each result element of the // pmullw, so it doesn't matter what's in the high byte of each 16-bit // element. @@ -23432,20 +23432,12 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, AHi = DAG.getBitcast(ExVT, AHi); BHi = DAG.getBitcast(ExVT, BHi); - // Multiply, mask the lower 8bits of the lo/hi results and pack + // Multiply, mask the lower 8bits of the lo/hi results and pack. SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); - RLo = DAG.getBitcast(VT, RLo); - RHi = DAG.getBitcast(VT, RHi); - - // For each 128-bit lane, we need to take the 8 even elements from RLo then - // the 8 even elements from RHi. - SmallVector<int, 64> PackMask; - createPackShuffleMask(VT, PackMask, /*Unary*/false); - - return DAG.getVectorShuffle(VT, dl, RLo, RHi, PackMask); + return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. |

