summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6
-rw-r--r--llvm/test/CodeGen/X86/avx512-bugfix-26264.ll18
-rw-r--r--llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll29
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll2
4 files changed, 25 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 737bb1f9c9f..1aa8df29af3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4059,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD, IsExpanding);
- if (AddToChain) {
- SDValue OutChain = Load.getValue(1);
- DAG.setRoot(OutChain);
- }
+ if (AddToChain)
+ PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
}
diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll
index 4d54fb71523..e9d0161dd94 100644
--- a/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll
+++ b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll
@@ -7,13 +7,12 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
; AVX512BW-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
-; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
-; AVX512BW-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
+; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
+; AVX512BW-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
+; AVX512BW-NEXT: kshiftrd $16, %k1, %k1
+; AVX512BW-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1}
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
-; AVX512BW-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
-; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
; AVX512BW-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
-; AVX512BW-NEXT: vmovapd %zmm5, %zmm2
; AVX512BW-NEXT: retq
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
ret <32 x double> %res
@@ -25,13 +24,12 @@ define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64
; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
; AVX512BW-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
-; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
-; AVX512BW-NEXT: vpblendmq 128(%rdi), %zmm3, %zmm5 {%k2}
+; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
+; AVX512BW-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k2}
+; AVX512BW-NEXT: kshiftrd $16, %k1, %k1
+; AVX512BW-NEXT: vpblendmq 128(%rdi), %zmm3, %zmm2 {%k1}
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
-; AVX512BW-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1}
-; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
; AVX512BW-NEXT: vpblendmq 192(%rdi), %zmm4, %zmm3 {%k1}
-; AVX512BW-NEXT: vmovdqa64 %zmm5, %zmm2
; AVX512BW-NEXT: retq
%res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
ret <32 x i64> %res
diff --git a/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll b/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll
index 275884c6de0..f199cb097aa 100644
--- a/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll
+++ b/llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll
@@ -94,10 +94,10 @@ declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16
define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
; AVX512-LABEL: test23:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k1
-; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k2
-; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k1
+; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k2
+; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k2} {z}
+; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
; AVX512-NEXT: retq
%mask = icmp eq <16 x i32*> %trigger, zeroinitializer
%res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
@@ -234,19 +234,19 @@ declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i3
define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
; AVX512F-LABEL: test_load_32f64:
; AVX512F: ## %bb.0:
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm5
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5
+; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5
; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5
; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
-; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
-; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2}
+; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1}
; AVX512F-NEXT: kshiftrw $8, %k2, %k2
-; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2}
+; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
-; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
+; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
; AVX512F-NEXT: vmovapd %zmm5, %zmm2
; AVX512F-NEXT: retq
;
@@ -255,13 +255,12 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
; SKX-NEXT: vpmovb2m %ymm0, %k1
; SKX-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
-; SKX-NEXT: kshiftrd $16, %k1, %k2
-; SKX-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
+; SKX-NEXT: kshiftrd $16, %k1, %k1
+; SKX-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1}
; SKX-NEXT: kshiftrw $8, %k1, %k1
-; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
-; SKX-NEXT: kshiftrw $8, %k2, %k1
; SKX-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
-; SKX-NEXT: vmovapd %zmm5, %zmm2
; SKX-NEXT: retq
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
ret <32 x double> %res
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index aa6ae096445..812d9f50fe3 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -976,8 +976,8 @@ define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) {
define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) {
; AVX-LABEL: mload_constmask_v8f64:
; AVX: ## %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7]
; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6,7]
+; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7]
; AVX-NEXT: retq
;
; AVX512F-LABEL: mload_constmask_v8f64:
OpenPOWER on IntegriCloud