diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1de40c12562..53d56b950aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12733,7 +12733,9 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, // efficient instructions that mirror the shuffles across the four 128-bit // lanes. SmallVector<int, 4> RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask)) { + bool Is128BitLaneRepeatedShuffle = + is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); + if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, @@ -12761,6 +12763,16 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; + // Assume that a single SHUFPS is faster than using a permv shuffle. + // If some CPU is harmed by the domain switch, we can fix it in a later pass. + if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { + SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); + SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); + SDValue ShufPS = + DAG.getVectorShuffle(MVT::v16f32, DL, CastV1, CastV2, Mask); + return DAG.getBitcast(MVT::v16i32, ShufPS); + } + return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } |