diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll | 38 |
3 files changed, 75 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c025b44c3b1..65d246d7d24 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24857,9 +24857,26 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask, if (SrcVT.is256BitVector() && !Subtarget.hasAVX2()) FloatDomain = true; - // TODO - support LaneCrossing for AVX2 PERMQ/PERMPD - if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) + // Check for lane crossing permutes. + if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { + // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). + if (Subtarget.hasAVX2() && SrcVT.is256BitVector() && Mask.size() == 4) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); + PermuteImm = getV4X86ShuffleImm(Mask); + return true; + } + if (Subtarget.hasAVX512() && SrcVT.is512BitVector() && Mask.size() == 8) { + SmallVector<int, 4> RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64); + PermuteImm = getV4X86ShuffleImm(RepeatedMask); + return true; + } + } return false; + } // VPERMILPD can permute with a non-repeating shuffle. if (FloatDomain && MaskScalarSizeInBits == 64) { diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 9c56d9c5e1d..82fbcf45fb5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -184,6 +184,24 @@ define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) { ret <4 x double> %4 } +define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) { +; CHECK-LABEL: combine_permd_as_permq: +; CHECK: # BB#0: +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1] +; CHECK-NEXT: retq + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>) + ret <8 x i32> %1 +} + +define <8 x float> @combine_permps_as_permpd(<8 x float> %a) { +; CHECK-LABEL: combine_permps_as_permpd: +; CHECK: # BB#0: +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1] +; CHECK-NEXT: retq + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>) + ret <8 x float> %1 +} + define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) { ; CHECK-LABEL: combine_pshufb_as_pslldq: ; CHECK: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 27e0cc007e7..59f81851730 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -399,6 +399,44 @@ define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) { ret <8 x i64> %1 } +define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) { +; CHECK-LABEL: combine_permvar_8i64_as_permq: +; CHECK: # BB#0: +; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4] +; CHECK-NEXT: retq + %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 -1) + ret <8 x i64> %1 +} +define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) { +; CHECK-LABEL: combine_permvar_8i64_as_permq_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 %m) + ret <8 x i64> %1 +} + +define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) { +; CHECK-LABEL: combine_permvar_8f64_as_permpd: +; CHECK: # BB#0: +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4] +; CHECK-NEXT: retq + %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 -1) + ret <8 x double> %1 +} +define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) { +; CHECK-LABEL: combine_permvar_8f64_as_permpd_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m) + ret <8 x double> %1 +} + define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) { ; CHECK-LABEL: combine_pshufb_as_pslldq: ; CHECK: # BB#0: |