diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-16 14:30:04 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-16 14:30:04 +0000 |
commit | 9519bd9232858edcfc294cd93a52516afd07144e (patch) | |
tree | 3e580fbbf9c3a513973f6d6d2989abcb31c3dc15 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | 224416a9e4a79cb1eaf5d9b3ca662ecbd3cba09f (diff) | |
download | bcm5719-llvm-9519bd9232858edcfc294cd93a52516afd07144e.tar.gz bcm5719-llvm-9519bd9232858edcfc294cd93a52516afd07144e.zip |
[X86][AVX512] use a single shufps for 512-bit vectors when it can save instructions
This is the 512-bit counterpart to the 128-bit transform checked in here:
https://reviews.llvm.org/rL289837
This patch is based on the draft by @sroland (Roland Scheidegger) that is attached to PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
llvm-svn: 289946
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1de40c12562..53d56b950aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12733,7 +12733,9 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, // efficient instructions that mirror the shuffles across the four 128-bit // lanes. SmallVector<int, 4> RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask)) { + bool Is128BitLaneRepeatedShuffle = + is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); + if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, @@ -12761,6 +12763,16 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; + // Assume that a single SHUFPS is faster than using a permv shuffle. + // If some CPU is harmed by the domain switch, we can fix it in a later pass. + if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { + SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); + SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); + SDValue ShufPS = + DAG.getVectorShuffle(MVT::v16f32, DL, CastV1, CastV2, Mask); + return DAG.getBitcast(MVT::v16i32, ShufPS); + } + return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } |