3 files changed, 745 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll b/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll
index 837569b4cde..356982c3ae9 100644
--- a/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll
+++ b/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll
@@ -139,3 +139,109 @@ define i32 @add_i32_seq_cst(i32* %p, i32 %v) {
   %old = atomicrmw add i32* %p, i32 %v seq_cst
   ret i32 %old
 }
+
+; Ternary RMW instruction: cmpxchg
+; The success and failure ordering arguments specify how this cmpxchg
+; synchronizes with other atomic operations. Both ordering parameters must be at
+; least monotonic, the ordering constraint on failure must be no stronger than
+; that on success, and the failure ordering cannot be either release or acq_rel.
+
+; CHECK-LABEL: cmpxchg_i32_monotonic_monotonic:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_monotonic_monotonic(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new monotonic monotonic
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_acquire_monotonic:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_acquire_monotonic(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new acquire monotonic
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_release_monotonic:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_release_monotonic(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new release monotonic
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_acq_rel_monotonic:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_acq_rel_monotonic(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new acq_rel monotonic
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_seq_cst_monotonic:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_seq_cst_monotonic(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst monotonic
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_acquire_acquire:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_acquire_acquire(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new acquire acquire
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_release_acquire:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_release_acquire(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new release acquire
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_acq_rel_acquire:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_acq_rel_acquire(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new acq_rel acquire
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_seq_cst_acquire:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_seq_cst_acquire(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst acquire
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_seq_cst_seq_cst:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_seq_cst_seq_cst(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
diff --git a/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll
index 1a9b2e84aaa..3f138a2c002 100644
--- a/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll
+++ b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll
@@ -64,6 +64,27 @@ define i32 @xchg_i32(i32* %p, i32 %v) {
   ret i32 %old
 }
 
+; CHECK-LABEL: cmpxchg_i32_loaded_value:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_loaded_value(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_success:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i32.eq $push1=, $pop0, $1{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i1 @cmpxchg_i32_success(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst
+  %succ = extractvalue { i32, i1 } %pair, 1
+  ret i1 %succ
+}
+
 ;===----------------------------------------------------------------------------
 ; Atomic read-modify-writes: 64-bit
 ;===----------------------------------------------------------------------------
@@ -122,6 +143,27 @@ define i64 @xchg_i64(i64* %p, i64 %v) {
   ret i64 %old
 }
 
+; CHECK-LABEL: cmpxchg_i64_loaded_value:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @cmpxchg_i64_loaded_value(i64* %p, i64 %exp, i64 %new) {
+  %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; CHECK-LABEL: cmpxchg_i64_success:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i64.eq $push1=, $pop0, $1{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i1 @cmpxchg_i64_success(i64* %p, i64 %exp, i64 %new) {
+  %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst
+  %succ = extractvalue { i64, i1 } %pair, 1
+  ret i1 %succ
+}
+
 ;===----------------------------------------------------------------------------
 ; Atomic truncating & sign-extending RMWs
 ;===----------------------------------------------------------------------------
@@ -510,6 +552,81 @@ define i64 @xchg_sext_i32_i64(i32* %p, i64 %v) {
   ret i64 %e
 }
 
+; cmpxchg
+
+; CHECK-LABEL: cmpxchg_sext_i8_i32:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i32 @cmpxchg_sext_i8_i32(i8* %p, i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %e = sext i8 %old to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: cmpxchg_sext_i16_i32:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i32 @cmpxchg_sext_i16_i32(i16* %p, i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %e = sext i16 %old to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: cmpxchg_sext_i8_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i64 @cmpxchg_sext_i8_i64(i8* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i8
+  %new_t = trunc i64 %new to i8
+  %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %e = sext i8 %old to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: cmpxchg_sext_i16_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i64 @cmpxchg_sext_i16_i64(i16* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i16
+  %new_t = trunc i64 %new to i16
+  %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %e = sext i16 %old to i64
+  ret i64 %e
+}
+
+; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64_extend_s/i32
+; CHECK-LABEL: cmpxchg_sext_i32_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i32.wrap/i64 $push1=, $1{{$}}
+; CHECK-NEXT: i32.wrap/i64 $push0=, $2{{$}}
+; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 0($0), $pop1, $pop0{{$}}
+; CHECK-NEXT: i64.extend_s/i32 $push3=, $pop2{{$}}
+; CHECK-NEXT: return $pop3{{$}}
+define i64 @cmpxchg_sext_i32_i64(i32* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i32
+  %new_t = trunc i64 %new to i32
+  %pair = cmpxchg i32* %p, i32 %exp_t, i32 %new_t seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  %e = sext i32 %old to i64
+  ret i64 %e
+}
+
 ;===----------------------------------------------------------------------------
 ; Atomic truncating & zero-extending RMWs
 ;===----------------------------------------------------------------------------
@@ -855,3 +972,70 @@ define i64 @xchg_zext_i32_i64(i32* %p, i64 %v) {
   %e = zext i32 %old to i64
   ret i64 %e
 }
+
+; cmpxchg
+
+; CHECK-LABEL: cmpxchg_zext_i8_i32:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_zext_i8_i32(i8* %p, i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %e = zext i8 %old to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: cmpxchg_zext_i16_i32:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_zext_i16_i32(i16* %p, i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %e = zext i16 %old to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: cmpxchg_zext_i8_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @cmpxchg_zext_i8_i64(i8* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i8
+  %new_t = trunc i64 %new to i8
+  %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %e = zext i8 %old to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: cmpxchg_zext_i16_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @cmpxchg_zext_i16_i64(i16* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i16
+  %new_t = trunc i64 %new to i16
+  %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %e = zext i16 %old to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: cmpxchg_zext_i32_i64:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw32_u.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @cmpxchg_zext_i32_i64(i32* %p, i64 %exp, i64 %new) {
+  %exp_t = trunc i64 %exp to i32
+  %new_t = trunc i64 %new to i32
+  %pair = cmpxchg i32* %p, i32 %exp_t, i32 %new_t seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  %e = zext i32 %old to i64
+  ret i64 %e
+}
diff --git a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll
index 5ead9548f5c..92617c1f4b1 100644
--- a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll
@@ -1070,3 +1070,458 @@ define i8 @rmw_add_i8_i32_retvalue(i8 *%p, i32 %v) {
   %old = atomicrmw add i8* %p, i8 %t seq_cst
   ret i8 %old
 }
+
+;===----------------------------------------------------------------------------
+; Atomic ternary read-modify-writes: 32-bit
+;===----------------------------------------------------------------------------
+
+; Basic RMW.
+
+; CHECK-LABEL: cmpxchg_i32_no_offset:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @cmpxchg_i32_no_offset(i32* %p, i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; With an nuw add, we can fold an offset.
+
+; CHECK-LABEL: cmpxchg_i32_with_folded_offset:
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i32 @cmpxchg_i32_with_folded_offset(i32* %p, i32 %exp, i32 %new) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; With an inbounds gep, we can fold an offset.
+
+; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset:
+; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i32 @cmpxchg_i32_with_folded_gep_offset(i32* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i32, i32* %p, i32 6
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; We can't fold a negative offset though, even with an inbounds gep.
+
+; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i32, i32* %p, i32 -6
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; Without nuw, and even with nsw, we can't fold an offset.
+
+; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i32 @cmpxchg_i32_with_unfolded_offset(i32* %p, i32 %exp, i32 %new) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; Without inbounds, we can't fold a gep offset.
+
+; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i32 @cmpxchg_i32_with_unfolded_gep_offset(i32* %p, i32 %exp, i32 %new) {
+  %s = getelementptr i32, i32* %p, i32 6
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; When loading from a fixed address, materialize a zero.
+
+; CHECK-LABEL: cmpxchg_i32_from_numeric_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}}
+define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) {
+  %s = inttoptr i32 42 to i32*
+  %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+; CHECK-LABEL: cmpxchg_i32_from_global_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}}
+define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) {
+  %pair = cmpxchg i32* @gv, i32 %exp, i32 %new seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  ret i32 %old
+}
+
+;===----------------------------------------------------------------------------
+; Atomic ternary read-modify-writes: 64-bit
+;===----------------------------------------------------------------------------
+
+; Basic RMW.
+
+; CHECK-LABEL: cmpxchg_i64_no_offset:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @cmpxchg_i64_no_offset(i64* %p, i64 %exp, i64 %new) {
+  %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; With an nuw add, we can fold an offset.
+
+; CHECK-LABEL: cmpxchg_i64_with_folded_offset:
+; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i64 @cmpxchg_i64_with_folded_offset(i64* %p, i64 %exp, i64 %new) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; With an inbounds gep, we can fold an offset.
+
+; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset:
+; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i64 @cmpxchg_i64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %new) {
+  %s = getelementptr inbounds i64, i64* %p, i32 3
+  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; We can't fold a negative offset though, even with an inbounds gep.
+
+; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %new) {
+  %s = getelementptr inbounds i64, i64* %p, i32 -3
+  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; Without nuw, and even with nsw, we can't fold an offset.
+
+; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i64 @cmpxchg_i64_with_unfolded_offset(i64* %p, i64 %exp, i64 %new) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+; Without inbounds, we can't fold a gep offset.
+
+; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
+define i64 @cmpxchg_i64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %new) {
+  %s = getelementptr i64, i64* %p, i32 3
+  %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst
+  %old = extractvalue { i64, i1 } %pair, 0
+  ret i64 %old
+}
+
+;===----------------------------------------------------------------------------
+; Atomic truncating & sign-extending ternary RMWs
+;===----------------------------------------------------------------------------
+
+; Fold an offset into a sign-extending rmw.
+
+; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}}
+; CHECK-NEXT: i32.extend8_s $push1=, $pop0
+define i32 @cmpxchg_i8_i32_s_with_folded_offset(i8* %p, i32 %exp, i32 %new) {
+  %q = ptrtoint i8* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i8*
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = sext i8 %old to i32
+  ret i32 %u
+}
+
+; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64_extend_s/i32
+; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset:
+; CHECK: i32.wrap/i64 $push1=, $1
+; CHECK-NEXT: i32.wrap/i64 $push0=, $2
+; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}}
+; CHECK-NEXT: i64.extend_s/i32 $push3=, $pop2{{$}}
+define i64 @cmpxchg_i32_i64_s_with_folded_offset(i32* %p, i64 %exp, i64 %new) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %exp_t = trunc i64 %exp to i32
+  %new_t = trunc i64 %new to i32
+  %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  %u = sext i32 %old to i64
+  ret i64 %u
+}
+
+; Fold a gep offset into a sign-extending rmw.
+
+; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}}
+; CHECK-NEXT: i32.extend8_s $push1=, $pop0
+define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = sext i8 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset:
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}}
+; CHECK-NEXT: i32.extend16_s $push1=, $pop0
+define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i16, i16* %p, i32 24
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = sext i16 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset:
+; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}}
+; CHECK-NEXT: i64.extend16_s $push1=, $pop0
+define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) {
+  %s = getelementptr inbounds i16, i16* %p, i32 24
+  %exp_t = trunc i64 %exp to i16
+  %new_t = trunc i64 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = sext i16 %old to i64
+  ret i64 %u
+}
+
+; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
+; an 'add' if the or'ed bits are known to be zero.
+
+; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
+; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
+define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
+  %and = and i32 %x, -4
+  %t0 = inttoptr i32 %and to i8*
+  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %conv = sext i8 %old to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset:
+; CHECK: i64.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
+; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
+define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
+  %and = and i32 %x, -4
+  %t0 = inttoptr i32 %and to i8*
+  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
+  %exp_t = trunc i64 %exp to i8
+  %new_t = trunc i64 %new to i8
+  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %conv = sext i8 %old to i64
+  ret i64 %conv
+}
+
+; When loading from a fixed address, materialize a zero.
+
+; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push1=, 42($pop0), $0, $1{{$}}
+; CHECK-NEXT: i32.extend16_s $push2=, $pop1
+define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) {
+  %s = inttoptr i32 42 to i16*
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = sext i16 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push1=, gv8($pop0), $0, $1{{$}}
+; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
+define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = sext i8 %old to i32
+  ret i32 %u
+}
+
+;===----------------------------------------------------------------------------
+; Atomic truncating & zero-extending ternary RMWs
+;===----------------------------------------------------------------------------
+
+; Fold an offset into a sign-extending rmw.
+
+; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i32 @cmpxchg_i8_i32_z_with_folded_offset(i8* %p, i32 %exp, i32 %new) {
+  %q = ptrtoint i8* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i8*
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = zext i8 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset:
+; CHECK: i64.atomic.rmw32_u.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i64 @cmpxchg_i32_i64_z_with_folded_offset(i32* %p, i64 %exp, i64 %new) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %exp_t = trunc i64 %exp to i32
+  %new_t = trunc i64 %new to i32
+  %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
+  %old = extractvalue { i32, i1 } %pair, 0
+  %u = zext i32 %old to i64
+  ret i64 %u
+}
+
+; Fold a gep offset into a sign-extending rmw.
+
+; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}}
+define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = zext i8 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset:
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}}
+define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) {
+  %s = getelementptr inbounds i16, i16* %p, i32 24
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = zext i16 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset:
+; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}}
+define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) {
+  %s = getelementptr inbounds i16, i16* %p, i32 24
+  %exp_t = trunc i64 %exp to i16
+  %new_t = trunc i64 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = zext i16 %old to i64
+  ret i64 %u
+}
+
+; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
+; an 'add' if the or'ed bits are known to be zero.
+
+; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset:
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
+define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
+  %and = and i32 %x, -4
+  %t0 = inttoptr i32 %and to i8*
+  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %conv = zext i8 %old to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset:
+; CHECK: i64.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
+define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
+  %and = and i32 %x, -4
+  %t0 = inttoptr i32 %and to i8*
+  %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
+  %exp_t = trunc i64 %exp to i8
+  %new_t = trunc i64 %new to i8
+  %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %conv = zext i8 %old to i64
+  ret i64 %conv
+}
+
+; When loading from a fixed address, materialize a zero.
+
+; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw16_u.cmpxchg $push1=, 42($pop0), $0, $1{{$}}
+define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) {
+  %s = inttoptr i32 42 to i16*
+  %exp_t = trunc i32 %exp to i16
+  %new_t = trunc i32 %new to i16
+  %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
+  %old = extractvalue { i16, i1 } %pair, 0
+  %u = zext i16 %old to i32
+  ret i32 %u
+}
+
+; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.atomic.rmw8_u.cmpxchg $push1=, gv8($pop0), $0, $1{{$}}
+define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) {
+  %exp_t = trunc i32 %exp to i8
+  %new_t = trunc i32 %new to i8
+  %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pair, 0
+  %u = zext i8 %old to i32
+  ret i32 %u
+}