[InstCombine] Optimize `atomicrmw <op>, 0` into `load atomic` when possible

This commit teaches InstCombine how to replace an atomicrmw operation into a simple load atomic. For a given `atomicrmw <op>`, this is possible when: 1. The ordering of that operation is compatible with a load (i.e., anything that doesn't have a release semantic). 2. <op> does not modify the value being stored Differential Revision: https://reviews.llvm.org/D57854 llvm-svn: 353471
author: Quentin Colombet <quentin.colombet@gmail.com> 2019-02-07 21:27:23 +0000
committer: Quentin Colombet <quentin.colombet@gmail.com> 2019-02-07 21:27:23 +0000
commit: 96f54de8ff5d357fadcf474823f70288561923bf (patch)
tree: 5f458880c0e8823862215080eafd91f82ce0447e
parent: 82bf8e82c96b79fcafea5d0026feab24a270e9af (diff)
download: bcm5719-llvm-96f54de8ff5d357fadcf474823f70288561923bf.tar.gz
bcm5719-llvm-96f54de8ff5d357fadcf474823f70288561923bf.zip
4 files changed, 134 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/CMakeLists.txt b/llvm/lib/Transforms/InstCombine/CMakeLists.txt
index 8a3a58e9ecc..bba399bba35 100644
--- a/llvm/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/llvm/lib/Transforms/InstCombine/CMakeLists.txt
@@ -5,6 +5,7 @@ add_public_tablegen_target(InstCombineTableGen)
 add_llvm_library(LLVMInstCombine
   InstructionCombining.cpp
   InstCombineAddSub.cpp
+  InstCombineAtomicRMW.cpp
   InstCombineAndOrXor.cpp
   InstCombineCalls.cpp
   InstCombineCasts.cpp
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
new file mode 100644
index 00000000000..86bbfb15986
--- /dev/null
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -0,0 +1,48 @@
+//===- InstCombineAtomicRMW.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for atomic rmw instructions.
+//
+//===----------------------------------------------------------------------===//
+#include "InstCombineInternal.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+  switch (RMWI.getOperation()) {
+  default:
+    break;
+  case AtomicRMWInst::Add:
+  case AtomicRMWInst::Sub:
+  case AtomicRMWInst::Or:
+    // Replace atomicrmw <op> addr, 0 => load atomic addr.
+
+    // Volatile RMWs perform a load and a store, we cannot replace
+    // this by just a load.
+    if (RMWI.isVolatile())
+      break;
+
+    auto *CI = dyn_cast<ConstantInt>(RMWI.getValOperand());
+    if (!CI || !CI->isZero())
+      break;
+    // Check if the required ordering is compatible with an
+    // atomic load.
+    AtomicOrdering Ordering = RMWI.getOrdering();
+    assert(Ordering != AtomicOrdering::NotAtomic &&
+           Ordering != AtomicOrdering::Unordered &&
+           "AtomicRMWs don't make sense with Unordered or NotAtomic");
+    if (Ordering != AtomicOrdering::Acquire &&
+        Ordering != AtomicOrdering::Monotonic)
+      break;
+    LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
+    Load->setAtomic(Ordering, RMWI.getSyncScopeID());
+    return Load;
+  }
+  return nullptr;
+}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 11993a46323..951e0e72e9e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -401,6 +401,7 @@ public:
   Instruction *visitFree(CallInst &FI);
   Instruction *visitLoadInst(LoadInst &LI);
   Instruction *visitStoreInst(StoreInst &SI);
+  Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
   Instruction *visitBranchInst(BranchInst &BI);
   Instruction *visitFenceInst(FenceInst &FI);
   Instruction *visitSwitchInst(SwitchInst &SI);
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
new file mode 100644
index 00000000000..e9474f1685c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -0,0 +1,84 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; Check that we can replace `atomicrmw <op> LHS, 0` with `load atomic LHS`.
+; This is possible when:
+; - <op> LHS, 0 == LHS
+; - the ordering of atomicrmw is compatible with a load (i.e., no release semantic)
+
+; CHECK-LABEL: atomic_add_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_add_zero(i32* %addr) {
+  %res = atomicrmw add i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; Don't transform volatile atomicrmw. This would eliminate a volatile store
+; otherwise.
+; CHECK-LABEL: atomic_sub_zero_volatile
+; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+; CHECK-NEXT: ret i64 %res
+define i64 @atomic_sub_zero_volatile(i64* %addr) {
+  %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+  ret i64 %res
+}
+
+
+; Check that the transformation properly preserve the syncscope.
+; CHECK-LABEL: atomic_or_zero
+; CHECK-NEXT: %res = load atomic i16, i16* %addr syncscope("some_syncscope") acquire, align 2
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_or_zero(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 syncscope("some_syncscope") acquire
+  ret i16 %res
+}
+
+; Don't transform seq_cst ordering.
+; By eliminating the store part of the atomicrmw, we would get rid of the
+; release semantic, which is incorrect.
+; CHECK-LABEL: atomic_or_zero_seq_cst
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 seq_cst
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_or_zero_seq_cst(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 seq_cst
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the value is changed by
+; the atomic operation (non zero constant).
+; CHECK-LABEL: atomic_or_non_zero
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 2 monotonic
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_or_non_zero(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 2 monotonic
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the value is changed by
+; the atomic operation (xor operation with zero).
+; CHECK-LABEL: atomic_xor_zero
+; CHECK-NEXT: %res = atomicrmw xor i16* %addr, i16 0 monotonic
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_xor_zero(i16* %addr) {
+  %res = atomicrmw xor i16* %addr, i16 0 monotonic
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (release).
+; CHECK-LABEL: atomic_or_zero_release
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 release
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_or_zero_release(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 release
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (acquire, release).
+; CHECK-LABEL: atomic_or_zero_acq_rel
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 acq_rel
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_or_zero_acq_rel(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 acq_rel
+  ret i16 %res
+}
author	Quentin Colombet <quentin.colombet@gmail.com>	2019-02-07 21:27:23 +0000
committer	Quentin Colombet <quentin.colombet@gmail.com>	2019-02-07 21:27:23 +0000
commit	96f54de8ff5d357fadcf474823f70288561923bf (patch)
tree	5f458880c0e8823862215080eafd91f82ce0447e
parent	82bf8e82c96b79fcafea5d0026feab24a270e9af (diff)
download	bcm5719-llvm-96f54de8ff5d357fadcf474823f70288561923bf.tar.gz bcm5719-llvm-96f54de8ff5d357fadcf474823f70288561923bf.zip