diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-vbroadcast.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 3 |
3 files changed, 12 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ebd7fa619f7..8862a194831 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8510,6 +8510,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V = V1; for (;;) { switch (V.getOpcode()) { + case ISD::BITCAST: { + SDValue VSrc = V.getOperand(0); + if (NumElts != VSrc.getSimpleValueType().getVectorNumElements()) + break; + V = VSrc; + continue; + } case ISD::CONCAT_VECTORS: { int OperandSize = Mask.size() / V.getNumOperands(); V = V.getOperand(BroadcastIdx / OperandSize); diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index b7030035444..b312be9aa6b 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -173,14 +173,12 @@ define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtabl ; X32-LABEL: load_splat_8i32_4i32_33333333: ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastss 12(%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_splat_8i32_4i32_33333333: ; X64: ## BB#0: ## %entry -; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 ; X64-NEXT: retq entry: %ld = load <4 x i32>, <4 x i32>* %ptr @@ -277,16 +275,12 @@ define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable re ; X32-LABEL: load_splat_4i64_2i64_1111: ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovaps (%eax), %xmm0 -; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_splat_4i64_2i64_1111: ; X64: ## BB#0: ## %entry -; X64-NEXT: vmovaps (%rdi), %xmm0 -; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 ; X64-NEXT: retq entry: %ld = load <2 x i64>, <2 x i64>* %ptr diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 8c50695b456..610462346fb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1320,8 +1320,7 @@ define <4 x double> @splat_v4f64(<2 x double> %r) { define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { ; AVX1-LABEL: splat_mem_v4i64_from_v2i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splat_mem_v4i64_from_v2i64: |