summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-02 18:14:31 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-02 18:14:31 +0000
commitcde7c54baafb635bc468325c4992c3c9a7380964 (patch)
tree3f30d1f466e9c44e7610d25a8b454807323507db
parent77dda7c2e0a28eb3016a9f8621bac9ed93c84bb0 (diff)
downloadbcm5719-llvm-cde7c54baafb635bc468325c4992c3c9a7380964.tar.gz
bcm5719-llvm-cde7c54baafb635bc468325c4992c3c9a7380964.zip
[X86][AVX512] Add support for 512-bit PSHUFB lowering
llvm-svn: 274444
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/test/CodeGen/X86/vector-bitreverse.ll18
2 files changed, 10 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b06736fbdd3..02f92080431 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7256,11 +7256,12 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
const int NumEltBytes = VT.getScalarSizeInBits() / 8;
assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
- (Subtarget.hasAVX2() && VT.is256BitVector()));
+ (Subtarget.hasAVX2() && VT.is256BitVector()) ||
+ (Subtarget.hasBWI() && VT.is512BitVector()));
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- SmallVector<SDValue, 32> PSHUFBMask(NumBytes);
+ SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
// Sign bit set in i8 mask means zero element.
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
@@ -11909,6 +11910,10 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return Rotate;
+ if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1,
+ V2, Subtarget, DAG))
+ return PSHUFB;
+
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 8f81596837e..2c8d01326bc 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -2744,11 +2744,7 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
;
; AVX512BW-LABEL: test_bitreverse_v32i16:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30,33,32,35,34,37,36,39,38,41,40,43,42,45,44,47,46,49,48,51,50,53,52,55,54,57,56,59,58,61,60,63,62]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
@@ -3175,11 +3171,7 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
;
; AVX512BW-LABEL: test_bitreverse_v16i32:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28,35,34,33,32,39,38,37,36,43,42,41,40,47,46,45,44,51,50,49,48,55,54,53,52,59,58,57,56,63,62,61,60]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
@@ -3710,11 +3702,7 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
;
; AVX512BW-LABEL: test_bitreverse_v8i64:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24,39,38,37,36,35,34,33,32,47,46,45,44,43,42,41,40,55,54,53,52,51,50,49,48,63,62,61,60,59,58,57,56]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
OpenPOWER on IntegriCloud