summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtur Pilipenko <apilipenko@azulsystems.com>2017-10-05 16:28:21 +0000
committerArtur Pilipenko <apilipenko@azulsystems.com>2017-10-05 16:28:21 +0000
commit7b15254c8fe0f5992fff9037468bca3cc61a4445 (patch)
tree47c94bb61ccc0a6548924a84fd7051c53625fd5d
parentaa0835a7abce9a7cbbf706539ef4712fa05c5a37 (diff)
downloadbcm5719-llvm-7b15254c8fe0f5992fff9037468bca3cc61a4445.tar.gz
bcm5719-llvm-7b15254c8fe0f5992fff9037468bca3cc61a4445.zip
[X86] Fix chains update when lowering BUILD_VECTOR to a vector load
The code which lowers BUILD_VECTOR of consecutive loads into a single vector load doesn't update chains properly. As a result the vector load can be reordered with the store to the same location. The current code in EltsFromConsecutiveLoads only updates the chain following the first load. The fix is to update the chains following all the loads comprising the vector. This is a fix for PR10114. Reviewed By: niravd Differential Revision: https://reviews.llvm.org/D38547 llvm-svn: 314988
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll262
2 files changed, 256 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9aae58b5555..b0e0b439257 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6540,14 +6540,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
- auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
+ SmallVector<LoadSDNode *, 8> Loads;
+ for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
+ if (LoadMask[i])
+ Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
+
+ auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
"Cannot merge volatile loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
- DAG.makeEquivalentMemoryOrdering(LDBase, NewLd);
+ for (auto *LD : Loads)
+ DAG.makeEquivalentMemoryOrdering(LD, NewLd);
return NewLd;
};
@@ -6612,7 +6618,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
- DAG.makeEquivalentMemoryOrdering(LDBase, ResNode);
+ for (auto *LD : Loads)
+ DAG.makeEquivalentMemoryOrdering(LD, ResNode);
return DAG.getBitcast(VT, ResNode);
}
}
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 75587b240b8..516a4800c2a 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -409,6 +409,124 @@ define <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp {
ret <4 x i32> %res3
}
+define <4 x i32> @merge_4i32_i32_23u5_inc2(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_23u5_inc2:
+; SSE: # BB#0:
+; SSE-NEXT: movups 8(%rdi), %xmm0
+; SSE-NEXT: incl 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_23u5_inc2:
+; AVX: # BB#0:
+; AVX-NEXT: vmovups 8(%rdi), %xmm0
+; AVX-NEXT: incl 8(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_23u5_inc2:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi6:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi7:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi8:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi9:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 8(%ecx), %edx
+; X32-SSE1-NEXT: movl 12(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%edx), %edi
+; X32-SSE1-NEXT: movl %edi, 8(%ecx)
+; X32-SSE1-NEXT: movl 20(%ecx), %ecx
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl %ecx, 12(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc2:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movups 8(%eax), %xmm0
+; X32-SSE41-NEXT: incl 8(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
+ %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %inc = add i32 %val0, 1
+ store i32 %inc, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %val3 = load i32, i32* %ptr3
+ %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
+ ret <4 x i32> %res3
+}
+
+define <4 x i32> @merge_4i32_i32_23u5_inc3(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_23u5_inc3:
+; SSE: # BB#0:
+; SSE-NEXT: movups 8(%rdi), %xmm0
+; SSE-NEXT: incl 12(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_23u5_inc3:
+; AVX: # BB#0:
+; AVX-NEXT: vmovups 8(%rdi), %xmm0
+; AVX-NEXT: incl 12(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_23u5_inc3:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi10:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi11:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi12:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi13:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 8(%ecx), %edx
+; X32-SSE1-NEXT: movl 12(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%esi), %edi
+; X32-SSE1-NEXT: movl %edi, 12(%ecx)
+; X32-SSE1-NEXT: movl 20(%ecx), %ecx
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl %ecx, 12(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_23u5_inc3:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movups 8(%eax), %xmm0
+; X32-SSE41-NEXT: incl 12(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
+ %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %inc = add i32 %val1, 1
+ store i32 %inc, i32* %ptr1
+ %val3 = load i32, i32* %ptr3
+ %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
+ ret <4 x i32> %res3
+}
+
define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_4i32_i32_3zuu:
; SSE: # BB#0:
@@ -513,6 +631,118 @@ define <4 x i32> @merge_4i32_i32_45zz(i32* %ptr) nounwind uwtable noinline ssp {
ret <4 x i32> %res1
}
+define <4 x i32> @merge_4i32_i32_45zz_inc4(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_45zz_inc4:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: incl 16(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_45zz_inc4:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: incl 16(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_45zz_inc4:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi14:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi15:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi16:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi17:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 16(%ecx), %edx
+; X32-SSE1-NEXT: movl 20(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%edx), %edi
+; X32-SSE1-NEXT: movl %edi, 16(%ecx)
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl $0, 12(%eax)
+; X32-SSE1-NEXT: movl $0, 8(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc4:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE41-NEXT: incl 16(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %inc = add i32 %val0, 1
+ store i32 %inc, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ ret <4 x i32> %res1
+}
+
+define <4 x i32> @merge_4i32_i32_45zz_inc5(i32* %ptr) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4i32_i32_45zz_inc5:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: incl 20(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4i32_i32_45zz_inc5:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: incl 20(%rdi)
+; AVX-NEXT: retq
+;
+; X32-SSE1-LABEL: merge_4i32_i32_45zz_inc5:
+; X32-SSE1: # BB#0:
+; X32-SSE1-NEXT: pushl %edi
+; X32-SSE1-NEXT: .Lcfi18:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE1-NEXT: pushl %esi
+; X32-SSE1-NEXT: .Lcfi19:
+; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X32-SSE1-NEXT: .Lcfi20:
+; X32-SSE1-NEXT: .cfi_offset %esi, -12
+; X32-SSE1-NEXT: .Lcfi21:
+; X32-SSE1-NEXT: .cfi_offset %edi, -8
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE1-NEXT: movl 16(%ecx), %edx
+; X32-SSE1-NEXT: movl 20(%ecx), %esi
+; X32-SSE1-NEXT: leal 1(%esi), %edi
+; X32-SSE1-NEXT: movl %edi, 20(%ecx)
+; X32-SSE1-NEXT: movl %esi, 4(%eax)
+; X32-SSE1-NEXT: movl %edx, (%eax)
+; X32-SSE1-NEXT: movl $0, 12(%eax)
+; X32-SSE1-NEXT: movl $0, 8(%eax)
+; X32-SSE1-NEXT: popl %esi
+; X32-SSE1-NEXT: popl %edi
+; X32-SSE1-NEXT: retl $4
+;
+; X32-SSE41-LABEL: merge_4i32_i32_45zz_inc5:
+; X32-SSE41: # BB#0:
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE41-NEXT: incl 20(%eax)
+; X32-SSE41-NEXT: retl
+ %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
+ %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
+ %val0 = load i32, i32* %ptr0
+ %val1 = load i32, i32* %ptr1
+ %inc = add i32 %val1, 1
+ store i32 %inc, i32* %ptr1
+ %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
+ %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
+ ret <4 x i32> %res1
+}
+
define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_8i16_i16_23u567u9:
; SSE: # BB#0:
@@ -527,14 +757,14 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_8i16_i16_23u567u9:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi6:
+; X32-SSE1-NEXT: .Lcfi22:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi7:
+; X32-SSE1-NEXT: .Lcfi23:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X32-SSE1-NEXT: .Lcfi8:
+; X32-SSE1-NEXT: .Lcfi24:
; X32-SSE1-NEXT: .cfi_offset %esi, -12
-; X32-SSE1-NEXT: .Lcfi9:
+; X32-SSE1-NEXT: .Lcfi25:
; X32-SSE1-NEXT: .cfi_offset %edi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -667,24 +897,24 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
; X32-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %ebp
-; X32-SSE1-NEXT: .Lcfi10:
+; X32-SSE1-NEXT: .Lcfi26:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %ebx
-; X32-SSE1-NEXT: .Lcfi11:
+; X32-SSE1-NEXT: .Lcfi27:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi12:
+; X32-SSE1-NEXT: .Lcfi28:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi13:
+; X32-SSE1-NEXT: .Lcfi29:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
-; X32-SSE1-NEXT: .Lcfi14:
+; X32-SSE1-NEXT: .Lcfi30:
; X32-SSE1-NEXT: .cfi_offset %esi, -20
-; X32-SSE1-NEXT: .Lcfi15:
+; X32-SSE1-NEXT: .Lcfi31:
; X32-SSE1-NEXT: .cfi_offset %edi, -16
-; X32-SSE1-NEXT: .Lcfi16:
+; X32-SSE1-NEXT: .Lcfi32:
; X32-SSE1-NEXT: .cfi_offset %ebx, -12
-; X32-SSE1-NEXT: .Lcfi17:
+; X32-SSE1-NEXT: .Lcfi33:
; X32-SSE1-NEXT: .cfi_offset %ebp, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -917,14 +1147,14 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %edi
-; X32-SSE1-NEXT: .Lcfi18:
+; X32-SSE1-NEXT: .Lcfi34:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %esi
-; X32-SSE1-NEXT: .Lcfi19:
+; X32-SSE1-NEXT: .Lcfi35:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
-; X32-SSE1-NEXT: .Lcfi20:
+; X32-SSE1-NEXT: .Lcfi36:
; X32-SSE1-NEXT: .cfi_offset %esi, -12
-; X32-SSE1-NEXT: .Lcfi21:
+; X32-SSE1-NEXT: .Lcfi37:
; X32-SSE1-NEXT: .cfi_offset %edi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
OpenPOWER on IntegriCloud