From 885dc592973ceaef288958e838a9aa923646a3d1 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Thu, 5 Oct 2017 21:18:42 +0000 Subject: [WebAssembly] Add the rest of the atomic loads Add extending loads and constant offset patterns A bit more refactoring of the tablegen to make the patterns fairly nice and uniform between the regular and atomic loads. Differential Revision: https://reviews.llvm.org/D38523 llvm-svn: 315022 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 6 + .../Target/WebAssembly/WebAssemblyInstrAtomics.td | 177 ++++++++++++++++++++- .../Target/WebAssembly/WebAssemblyInstrMemory.td | 65 ++------ .../WebAssembly/WebAssemblySetP2AlignOperands.cpp | 6 + 4 files changed, 198 insertions(+), 56 deletions(-) (limited to 'llvm/lib/Target/WebAssembly') diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 6a1bd8d0ddb..6efa7080cfe 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -111,6 +111,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD8_U_I32: case WebAssembly::LOAD8_S_I64: case WebAssembly::LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD8_U_I64: case WebAssembly::STORE8_I32: case WebAssembly::STORE8_I64: return 0; @@ -118,6 +120,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD16_U_I32: case WebAssembly::LOAD16_S_I64: case WebAssembly::LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I64: case WebAssembly::STORE16_I32: case WebAssembly::STORE16_I64: return 1; @@ -129,11 +133,13 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD32_U_I64: case WebAssembly::STORE32_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD32_U_I64: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_F64: case WebAssembly::STORE_I64: case WebAssembly::STORE_F64: + case WebAssembly::ATOMIC_LOAD_I64: return 3; default: llvm_unreachable("Only loads and stores have p2align values"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 355802f760b..a49172df158 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -17,19 +17,180 @@ //===----------------------------------------------------------------------===// let Defs = [ARGUMENTS] in { -// TODO: add the rest of the atomic loads -def ATOMIC_LOAD_I32 : CLoadI32<"i32.atomic.load", 0xfe10>; -def ATOMIC_LOAD_I64 : CLoadI64<"i64.atomic.load", 0xfe11>; +def ATOMIC_LOAD_I32 : WebAssemblyLoad; +def ATOMIC_LOAD_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] // Select loads with no constant offset. let Predicates = [HasAtomics] in { -class ALoadPatNoOffset : - Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>; -def : ALoadPatNoOffset; -def : ALoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; -} +// Select loads with a constant offset. + +// Pattern with address + immediate offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Select loads with just a constant offset. +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + +} // Predicates = [HasAtomics] + +// Extending loads. Note that there are only zero-extending atomic loads, no +// sign-extending loads. +let Defs = [ARGUMENTS] in { +def ATOMIC_LOAD8_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD8_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD32_U_I64 : WebAssemblyLoad; +} // Defs = [ARGUMENTS] + +// Fragments for exending loads. These are different from regular loads because +// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and +// therefore don't have the extension type field. So instead of matching that, +// we match the patterns that the type legalizer expands them to. + +// We directly match zext patterns and select the zext atomic loads. +// i32 (zext (i8 (atomic_load_8))) gets legalized to +// i32 (and (i32 (atomic_load_8)), 255) +// These can be selected to a single zero-extending atomic load instruction. +def zext_aload_8 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_8 node:$addr)), 255)>; +def zext_aload_16 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_16 node:$addr)), 65535)>; +// Unlike regular loads, extension to i64 is handled differently than i32. +// i64 (zext (i8 (atomic_load_8))) gets legalized to +// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255) +def zext_aload_8_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>; +def zext_aload_16_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>; +def zext_aload_32_64 : + PatFrag<(ops node:$addr), + (zext (i32 (atomic_load node:$addr)))>; + +// We don't have single sext atomic load instructions. So for sext loads, we +// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit +// results) and select a zext load; the next instruction will be sext_inreg +// which is selected by itself. +def anyext_aload_8_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>; +def anyext_aload_16_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; + +let Predicates = [HasAtomics] in { +// Select zero-extending loads with no constant offset. +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; + +// Select sign-extending loads with no constant offset +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64 + + +// Zero-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +// Sign-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Extending loads with just a constant offset +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + + +} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic stores diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 1897027b57f..9d58895ca5a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -55,28 +55,19 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off), let Defs = [ARGUMENTS] in { -// Classes to define both atomic and non-atomic integer loads -class CLoadI32 : - I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; - -class CLoadI64 : - I<(outs I64:$dst), +// Defines atomic and non-atomic loads, regular and extending. +class WebAssemblyLoad : + I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; // Basic load. // FIXME: When we can break syntax compatibility, reorder the fields in the // asmstrings to match the binary encoding. -def LOAD_I32 : CLoadI32<"i32.load", 0x28>; -def LOAD_I64 : CLoadI64<"i64.load", 0x29>; -def LOAD_F32 : I<(outs F32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>; -def LOAD_F64 : I<(outs F64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>; +def LOAD_I32 : WebAssemblyLoad; +def LOAD_I64 : WebAssemblyLoad; +def LOAD_F32 : WebAssemblyLoad; +def LOAD_F64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -153,36 +144,16 @@ def : LoadPatExternSymOffOnly; let Defs = [ARGUMENTS] in { // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>; -def LOAD8_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>; -def LOAD16_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>; -def LOAD16_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>; -def LOAD8_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>; -def LOAD8_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>; -def LOAD16_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>; -def LOAD16_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>; -def LOAD32_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>; -def LOAD32_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>; +def LOAD8_S_I32 : WebAssemblyLoad; +def LOAD8_U_I32 : WebAssemblyLoad; +def LOAD16_S_I32 : WebAssemblyLoad; +def LOAD16_U_I32 : WebAssemblyLoad; +def LOAD8_S_I64 : WebAssemblyLoad; +def LOAD8_U_I64 : WebAssemblyLoad; +def LOAD16_S_I64 : WebAssemblyLoad; +def LOAD16_U_I64 : WebAssemblyLoad; +def LOAD32_S_I64 : WebAssemblyLoad; +def LOAD32_U_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -290,7 +261,6 @@ def : LoadPatNoOffset; def : LoadPatNoOffset; def : LoadPatNoOffset; - // Select "don't care" extending loads with a constant offset. def : LoadPatImmOff; def : LoadPatImmOff; @@ -313,7 +283,6 @@ def : LoadPatExternalSym; def : LoadPatExternalSym; def : LoadPatExternalSym; - // Select "don't care" extending loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index a418f65e0ee..c4b9e915b41 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -97,6 +97,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::LOAD32_S_I64: case WebAssembly::LOAD32_U_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD_I64: + case WebAssembly::ATOMIC_LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD32_U_I64: RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); break; case WebAssembly::STORE_I32: -- cgit v1.2.3