summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-24 12:42:31 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-24 12:42:31 +0000
commit7a50c8c2ba58beaa70879d22c628ebd213fcf4ee (patch)
tree225fd7c993376e510b7527341f8fd87097b7a95f
parent3c8cd3df5e87e7549202af7f1b404ec0d106ecb1 (diff)
downloadbcm5719-llvm-7a50c8c2ba58beaa70879d22c628ebd213fcf4ee.tar.gz
bcm5719-llvm-7a50c8c2ba58beaa70879d22c628ebd213fcf4ee.zip
[X86][AVX2] Ensure on 32-bit targets that we broadcast f64 types not i64 (PR29101)
llvm-svn: 279622
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/test/CodeGen/X86/avx2-vbroadcast.ll55
2 files changed, 53 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 105b02dea71..293d5a4ac14 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8733,6 +8733,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
V = DAG.getBitcast(SrcVT, V);
}
+ // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+ if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
+ V = DAG.getBitcast(MVT::f64, V);
+ unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
+ BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
+ }
+
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 2ecf2fa5a6e..0b6d2fe3e10 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -232,6 +232,43 @@ entry:
ret <4 x i64> %q3
}
+define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
+; X32-LABEL: broadcast_mem_v4i16_v8i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: broadcast_mem_v4i16_v8i16:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastq (%rdi), %xmm0
+; X64-NEXT: retq
+ %load = load <4 x i16>, <4 x i16>* %ptr
+ %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i16> %shuf
+}
+
+define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
+; X32-LABEL: broadcast_mem_v4i16_v16i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: broadcast_mem_v4i16_v16i16:
+; X64: ## BB#0:
+; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X64-NEXT: vpbroadcastq %xmm0, %ymm0
+; X64-NEXT: retq
+ %load = load <4 x i16>, <4 x i16>* %ptr
+ %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i16> %shuf
+}
+
; FIXME: Pointer adjusted broadcasts
define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
@@ -563,7 +600,7 @@ entry:
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V111:
; X32: ## BB#0: ## %entry
-; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1
+; X32-NEXT: vpbroadcastd LCPI29_0, %ymm1
; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -580,7 +617,7 @@ entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-LABEL: V113:
; X32: ## BB#0: ## %entry
-; X32-NEXT: vbroadcastss LCPI28_0, %ymm1
+; X32-NEXT: vbroadcastss LCPI30_0, %ymm1
; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -597,7 +634,7 @@ entry:
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## BB#0:
-; X32-NEXT: vbroadcastss LCPI29_0, %xmm0
+; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: _e2:
@@ -637,25 +674,25 @@ define void @crash() nounwind alwaysinline {
; X32: ## BB#0: ## %WGLoopsEntry
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: testb %al, %al
-; X32-NEXT: je LBB31_1
+; X32-NEXT: je LBB33_1
; X32-NEXT: ## BB#2: ## %ret
; X32-NEXT: retl
; X32-NEXT: .p2align 4, 0x90
-; X32-NEXT: LBB31_1: ## %footer349VF
+; X32-NEXT: LBB33_1: ## %footer349VF
; X32-NEXT: ## =>This Inner Loop Header: Depth=1
-; X32-NEXT: jmp LBB31_1
+; X32-NEXT: jmp LBB33_1
;
; X64-LABEL: crash:
; X64: ## BB#0: ## %WGLoopsEntry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al
-; X64-NEXT: je LBB31_1
+; X64-NEXT: je LBB33_1
; X64-NEXT: ## BB#2: ## %ret
; X64-NEXT: retq
; X64-NEXT: .p2align 4, 0x90
-; X64-NEXT: LBB31_1: ## %footer349VF
+; X64-NEXT: LBB33_1: ## %footer349VF
; X64-NEXT: ## =>This Inner Loop Header: Depth=1
-; X64-NEXT: jmp LBB31_1
+; X64-NEXT: jmp LBB33_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
OpenPOWER on IntegriCloud