summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll14
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll3
3 files changed, 12 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ebd7fa619f7..8862a194831 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8510,6 +8510,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue V = V1;
for (;;) {
switch (V.getOpcode()) {
+ case ISD::BITCAST: {
+ SDValue VSrc = V.getOperand(0);
+ if (NumElts != VSrc.getSimpleValueType().getVectorNumElements())
+ break;
+ V = VSrc;
+ continue;
+ }
case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();
V = V.getOperand(BroadcastIdx / OperandSize);
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index b7030035444..b312be9aa6b 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -173,14 +173,12 @@ define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtabl
; X32-LABEL: load_splat_8i32_4i32_33333333:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastss 12(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_8i32_4i32_33333333:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
@@ -277,16 +275,12 @@ define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable re
; X32-LABEL: load_splat_4i64_2i64_1111:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovaps (%eax), %xmm0
-; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_4i64_2i64_1111:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovaps (%rdi), %xmm0
-; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 8c50695b456..610462346fb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1320,8 +1320,7 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
OpenPOWER on IntegriCloud