diff options
4 files changed, 705 insertions, 0 deletions
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll index 69615f52bbc..38e0cef1716 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll @@ -21,6 +21,25 @@ entry: ret void } +define void @write4to7_atomic(i32* nocapture %p) { +; CHECK-LABEL: @write4to7_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1 + store atomic i32 1, i32* %arrayidx1 unordered, align 4 + ret void +} + define void @write0to3(i32* nocapture %p) { ; CHECK-LABEL: @write0to3( ; CHECK-NEXT: entry: @@ -37,6 +56,37 @@ entry: ret void } +define void @write0to3_atomic(i32* nocapture %p) { +; CHECK-LABEL: @write0to3_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: store atomic i32 1, i32* [[P]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + store atomic i32 1, i32* %p unordered, align 4 + ret void +} + +; Atomicity of the store is weaker from the memset +define void @write0to3_atomic_weaker(i32* nocapture %p) { +; CHECK-LABEL: @write0to3_atomic_weaker( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + store i32 1, i32* %p, align 4 + ret void +} + define void @write0to7(i32* nocapture %p) { ; CHECK-LABEL: @write0to7( ; CHECK-NEXT: entry: @@ -55,6 +105,25 @@ entry: ret void } +; Changing the memset start and length is okay here because the +; store is a multiple of the memset element size +define void @write0to7_atomic(i32* nocapture %p) { +; CHECK-LABEL: @write0to7_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) + %p4 = bitcast i32* %p to i64* + store atomic i64 1, i64* %p4 unordered, align 8 + ret void +} + define void @write0to7_2(i32* nocapture %p) { ; CHECK-LABEL: @write0to7_2( ; CHECK-NEXT: entry: @@ -75,6 +144,25 @@ entry: ret void } +define void @write0to7_2_atomic(i32* nocapture %p) { +; CHECK-LABEL: @write0to7_2_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: store atomic i64 1, i64* [[P4]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + %p4 = bitcast i32* %p to i64* + store atomic i64 1, i64* %p4 unordered, align 8 + ret void +} + ; We do not trim the beginning of the eariler write if the alignment of the ; start pointer is changed. define void @dontwrite0to3_align8(i32* nocapture %p) { @@ -92,6 +180,21 @@ entry: ret void } +define void @dontwrite0to3_align8_atomic(i32* nocapture %p) { +; CHECK-LABEL: @dontwrite0to3_align8_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: store atomic i32 1, i32* [[P]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i32 4) + store atomic i32 1, i32* %p unordered, align 4 + ret void +} + define void @dontwrite0to1(i32* nocapture %p) { ; CHECK-LABEL: @dontwrite0to1( ; CHECK-NEXT: entry: @@ -109,6 +212,23 @@ entry: ret void } +define void @dontwrite0to1_atomic(i32* nocapture %p) { +; CHECK-LABEL: @dontwrite0to1_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* +; CHECK-NEXT: store atomic i16 1, i16* [[P4]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) + %p4 = bitcast i32* %p to i16* + store atomic i16 1, i16* %p4 unordered, align 4 + ret void +} + define void @dontwrite2to9(i32* nocapture %p) { ; CHECK-LABEL: @dontwrite2to9( ; CHECK-NEXT: entry: @@ -132,6 +252,29 @@ entry: ret void } +define void @dontwrite2to9_atomic(i32* nocapture %p) { +; CHECK-LABEL: @dontwrite2to9_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: [[P4:%.*]] = bitcast i32* [[P]] to i16* +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1 +; CHECK-NEXT: [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64* +; CHECK-NEXT: store atomic i64 1, i64* [[P5]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) + %p4 = bitcast i32* %p to i16* + %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1 + %p5 = bitcast i16* %arrayidx2 to i64* + store atomic i64 1, i64* %p5 unordered, align 8 + ret void +} + define void @write8To15AndThen0To7(i64* nocapture %P) { ; CHECK-LABEL: @write8To15AndThen0To7( ; CHECK-NEXT: entry: @@ -159,5 +302,84 @@ entry: ret void } +define void @write8To15AndThen0To7_atomic(i64* nocapture %P) { +; CHECK-LABEL: @write8To15AndThen0To7_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 +; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 +; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8 +; CHECK-NEXT: store atomic i64 2, i64* [[BASE64_0]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 + %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 + + store atomic i64 1, i64* %base64_1 unordered, align 8 + store atomic i64 2, i64* %base64_0 unordered, align 8 + ret void +} + +define void @write8To15AndThen0To7_atomic_weaker(i64* nocapture %P) { +; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 +; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 +; CHECK-NEXT: store atomic i64 1, i64* [[BASE64_1]] unordered, align 8 +; CHECK-NEXT: store i64 2, i64* [[BASE64_0]], align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 + %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 + + store atomic i64 1, i64* %base64_1 unordered, align 8 + store i64 2, i64* %base64_0, align 8 + ret void +} + +define void @write8To15AndThen0To7_atomic_weaker_2(i64* nocapture %P) { +; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0 +; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 +; CHECK-NEXT: store i64 1, i64* [[BASE64_1]], align 8 +; CHECK-NEXT: store atomic i64 2, i64* [[BASE64_0]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_0 = getelementptr inbounds i64, i64* %P, i64 0 + %base64_1 = getelementptr inbounds i64, i64* %P, i64 1 + + store i64 1, i64* %base64_1, align 8 + store atomic i64 2, i64* %base64_0 unordered, align 8 + ret void +} + declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll index 00282e0648a..ace06b46758 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll @@ -27,6 +27,45 @@ entry: ret void } +define void @write24to28_atomic(i32* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write24to28_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 + store atomic i32 1, i32* %arrayidx1 unordered, align 4 + ret void +} + +; Atomicity of the store is weaker from the memset +define void @write24to28_atomic_weaker(i32* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write24to28_atomic_weaker( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4) + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 + store i32 1, i32* %arrayidx1, align 4 + ret void +} + define void @write28to32(i32* nocapture %p) nounwind uwtable ssp { ; CHECK-LABEL: @write28to32( ; CHECK-NEXT: entry: @@ -44,6 +83,23 @@ entry: ret void } +define void @write28to32_atomic(i32* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write28to32_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4) + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 + store atomic i32 1, i32* %arrayidx1 unordered, align 4 + ret void +} + define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp { ; CHECK-LABEL: @dontwrite28to32memset( ; CHECK-NEXT: entry: @@ -61,6 +117,23 @@ entry: ret void } +define void @dontwrite28to32memset_atomic(i32* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @dontwrite28to32memset_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 [[P3]], i8 0, i64 32, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %p3 = bitcast i32* %p to i8* + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i32 4) + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7 + store atomic i32 1, i32* %arrayidx1 unordered, align 4 + ret void +} + define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { ; CHECK-LABEL: @write32to36( ; CHECK-NEXT: entry: @@ -78,6 +151,41 @@ entry: ret void } +define void @write32to36_atomic(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write32to36_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2 +; CHECK-NEXT: store atomic i32 1, i32* [[C]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast %struct.vec2plusi* %p to i8* + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) + %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2 + store atomic i32 1, i32* %c unordered, align 4 + ret void +} + +; Atomicity of the store is weaker than the memcpy +define void @write32to36_atomic_weaker(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write32to36_atomic_weaker( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2 +; CHECK-NEXT: store i32 1, i32* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast %struct.vec2plusi* %p to i8* + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4) + %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2 + store i32 1, i32* %c, align 4 + ret void +} + define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp { ; CHECK-LABEL: @write16to32( ; CHECK-NEXT: entry: @@ -95,6 +203,23 @@ entry: ret void } +define void @write16to32_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @write16to32_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1 +; CHECK-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast %struct.vec2* %p to i8* + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) + %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1 + store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4 + ret void +} + define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp { ; CHECK-LABEL: @dontwrite28to32memcpy( ; CHECK-NEXT: entry: @@ -112,8 +237,27 @@ entry: ret void } +define void @dontwrite28to32memcpy_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp { +; CHECK-LABEL: @dontwrite28to32memcpy_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 0, i64 7 +; CHECK-NEXT: store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast %struct.vec2* %p to i8* + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4) + %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7 + store atomic i32 1, i32* %arrayidx1 unordered, align 4 + ret void +} + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind %struct.trapframe = type { i64, i64, i64 } @@ -166,3 +310,81 @@ entry: store i64 3, i64* %base64_3 ret void } + +define void @write16To23AndThen24To31_atomic(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { +; CHECK-LABEL: @write16To23AndThen24To31_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 +; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 +; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8 +; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_3]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 + %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 + + store atomic i64 3, i64* %base64_2 unordered, align 8 + store atomic i64 3, i64* %base64_3 unordered, align 8 + ret void +} + +define void @write16To23AndThen24To31_atomic_weaker1(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { +; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 +; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 +; CHECK-NEXT: store i64 3, i64* [[BASE64_2]], align 8 +; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_3]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 + %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 + + store i64 3, i64* %base64_2, align 8 + store atomic i64 3, i64* %base64_3 unordered, align 8 + ret void +} + +define void @write16To23AndThen24To31_atomic_weaker2(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) { +; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8* +; CHECK-NEXT: [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0 +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 32, i32 8) +; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2 +; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3 +; CHECK-NEXT: store atomic i64 3, i64* [[BASE64_2]] unordered, align 8 +; CHECK-NEXT: store i64 3, i64* [[BASE64_3]], align 8 +; CHECK-NEXT: ret void +; +entry: + + %base0 = bitcast i64* %P to i8* + %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0 + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8) + + %base64_2 = getelementptr inbounds i64, i64* %P, i64 2 + %base64_3 = getelementptr inbounds i64, i64* %P, i64 3 + + store atomic i64 3, i64* %base64_2 unordered, align 8 + store i64 3, i64* %base64_3, align 8 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll index 5c6af102f64..00d85f4460d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll @@ -46,3 +46,62 @@ define void @test3() { ret void } + +declare void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind +declare void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* nocapture, i8, i16, i32) nounwind + + +define void @test4() { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 +; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2 +; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2 +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2) +; CHECK-NEXT: ret void +; + %A = alloca i16, i16 1024, align 2 + %B = alloca i16, i16 1024, align 2 + + store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memcpy + store atomic i16 0, i16* %B unordered, align 2 ;; Read by memcpy + + call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2) + + ret void +} + +define void @test5() { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 +; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2 +; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2 +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2) +; CHECK-NEXT: ret void +; + %A = alloca i16, i16 1024, align 2 + %B = alloca i16, i16 1024, align 2 + + store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memmove + store atomic i16 0, i16* %B unordered, align 2 ;; Read by memmove + + call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2) + + ret void +} + +define void @test6() { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 [[A]], i8 0, i16 1024, i32 2) +; CHECK-NEXT: ret void +; + %A = alloca i16, i16 1024, align 2 + %B = alloca i16, i16 1024, align 2 + + store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memset + + call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 %A, i8 0, i16 1024, i32 2) + + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll index c8421904fa4..585b7ca28a9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -4,7 +4,9 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.init.trampoline(i8*, i8*, i8*) define void @test1(i32* %Q, i32* %P) { @@ -86,6 +88,21 @@ define void @test6(i32 *%p, i8 *%q) { ret void } +; Should delete store of 10 even though memset is a may-store to P (P and Q may +; alias). +define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { +; CHECK-LABEL: @test6_atomic( +; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) +; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 10, i32* %p unordered, align 4 ;; dead. + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4) + store atomic i32 30, i32* %p unordered, align 4 + ret void +} + ; Should delete store of 10 even though memcpy is a may-store to P (P and Q may ; alias). define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { @@ -100,6 +117,21 @@ define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { ret void } +; Should delete store of 10 even though memcpy is a may-store to P (P and Q may +; alias). +define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { +; CHECK-LABEL: @test7_atomic( +; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) +; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 10, i32* %p unordered, align 4 ;; dead. + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4) + store atomic i32 30, i32* %p unordered, align 4 + ret void +} + ; Do not delete stores that are only partially killed. define i32 @test8() { ; CHECK-LABEL: @test8( @@ -256,6 +288,42 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp { ret void } +;; Fully dead overwrite of memcpy. +define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +; It would only be valid to remove the non-atomic memcpy +define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +; It would only be valid to remove the non-atomic memcpy +define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + ;; Full overwrite of smaller memcpy. define void @test16(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test16( @@ -267,6 +335,42 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp { ret void } +;; Full overwrite of smaller memcpy. +define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Full overwrite of smaller memory where overwrite has stronger atomicity +define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Full overwrite of smaller memory where overwrite has weaker atomicity. +define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + ;; Overwrite of memset by memcpy. define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { ; CHECK-LABEL: @test17( @@ -278,6 +382,44 @@ define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { ret void } +;; Overwrite of memset by memcpy. +define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic( +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can +;; remove the memset. +define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove +;; the memset. +define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + ; Should not delete the volatile memset. define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test17v( @@ -308,6 +450,17 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ret void } +define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { +; CHECK-LABEL: @test18_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) + ret void +} + ; The store here is not dead because the byval call reads it. declare void @test19f({i32}* byval align 4 %P) @@ -666,6 +819,18 @@ define void @test36(i8* %P, i8* %Q) { ret void } +define void @test36_atomic(i8* %P, i8* %Q) { +; CHECK-LABEL: @test36_atomic( +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + + tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + define void @test37(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test37( ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) @@ -678,6 +843,18 @@ define void @test37(i8* %P, i8* %Q, i8* %R) { ret void } +define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test37_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) + ret void +} + ; Same caveat about memcpy as in @test18 applies here. define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( @@ -691,6 +868,18 @@ define void @test38(i8* %P, i8* %Q, i8* %R) { ret void } +define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test38_atomic( +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + + tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) + ret void +} + define void @test39(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test39( ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) @@ -703,4 +892,17 @@ define void @test39(i8* %P, i8* %Q, i8* %R) { ret void } +define void @test39_atomic(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test39_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1) +; CHECK-NEXT: ret void +; + + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1) + ret void +} + declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32) |

