diff options
| author | Guillaume Chatelet <gchatelet@google.com> | 2019-07-09 09:53:36 +0000 |
|---|---|---|
| committer | Guillaume Chatelet <gchatelet@google.com> | 2019-07-09 09:53:36 +0000 |
| commit | 336f3e16011cfbe2cdc023aba6efdb7f55a8e343 (patch) | |
| tree | 783603b38e767ae769c609a6160f295a8c7d5d17 | |
| parent | 9bebc65d7965e12f83ea1ca4d8a0cdb2e079a45b (diff) | |
| download | bcm5719-llvm-336f3e16011cfbe2cdc023aba6efdb7f55a8e343.tar.gz bcm5719-llvm-336f3e16011cfbe2cdc023aba6efdb7f55a8e343.zip | |
Fixing @llvm.memcpy not honoring volatile.
This is explicitly not addressing target-specific code, or calls to memcpy.
Summary: https://bugs.llvm.org/show_bug.cgi?id=42254
Reviewers: courbet
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63215
llvm-svn: 365449
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 43 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll | 83 |
2 files changed, 107 insertions, 19 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 93a2fe9515d..5852e693fa9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5775,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5801,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - (isZeroConstant ? 0 : SrcAlign), - false, false, CopyFromConstant, true, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes())) + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5961,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5981,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Align > SrcAlign) SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - - if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes())) + // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in + // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the + // correct code. + bool AllowOverlap = false; + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, + /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -6066,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -6082,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); - if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), - Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, - DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + if (!TLI.findOptimalMemOpLowering( + MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, + (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { diff --git a/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll new file mode 100644 index 00000000000..aad332489fb --- /dev/null +++ b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 +define dso_local void @copy_7_bytes(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 { +; CHECK-LABEL: copy_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl 3(%rsi), %ecx +; CHECK-NEXT: movl %ecx, 3(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false) + ret void +} +define dso_local void @copy_7_bytes_volatile(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 { +; CHECK-LABEL: copy_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movb 6(%rsi), %al +; CHECK-NEXT: movb %al, 6(%rdi) +; CHECK-NEXT: movzwl 4(%rsi), %eax +; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true) + ret void +} + + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1 +define dso_local void @move_7_bytes(i8* nocapture, i8* nocapture readonly) nounwind #0 { +; CHECK-LABEL: move_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movzwl 4(%rsi), %ecx +; CHECK-NEXT: movb 6(%rsi), %dl +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false) + ret void +} +define dso_local void @move_7_bytes_volatile(i8* nocapture, i8* nocapture readonly) nounwind #0 { +; CHECK-LABEL: move_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movzwl 4(%rsi), %ecx +; CHECK-NEXT: movb 6(%rsi), %dl +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true) + ret void +} + + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8 , i64, i1 immarg) #1 +define dso_local void @set_7_bytes(i8* noalias nocapture) nounwind #0 { +; CHECK-LABEL: set_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $16843009, 3(%rdi) # imm = 0x1010101 +; CHECK-NEXT: movl $16843009, (%rdi) # imm = 0x1010101 +; CHECK-NEXT: retq + tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 false) + ret void +} +define dso_local void @set_7_bytes_volatile(i8* noalias nocapture) nounwind #0 { +; CHECK-LABEL: set_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movb $1, 6(%rdi) +; CHECK-NEXT: movw $257, 4(%rdi) # imm = 0x101 +; CHECK-NEXT: movl $16843009, (%rdi) # imm = 0x1010101 +; CHECK-NEXT: retq + tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 true) + ret void +} + +attributes #0 = { noreturn nounwind uwtable "target-cpu"="x86-64" } +attributes #1 = { argmemonly nounwind } |

