diff options
author | Thomas Lively <tlively@google.com> | 2018-10-03 23:02:23 +0000 |
---|---|---|
committer | Thomas Lively <tlively@google.com> | 2018-10-03 23:02:23 +0000 |
commit | 5d461c96bdbcc82f4fd6eeef88633f6f6c06c650 (patch) | |
tree | c9e2fba348fdbb96afb75afcc71414cf35d64e8e | |
parent | f7868ec25bc54a7b07f7498f44320f71242d1fb3 (diff) | |
download | bcm5719-llvm-5d461c96bdbcc82f4fd6eeef88633f6f6c06c650.tar.gz bcm5719-llvm-5d461c96bdbcc82f4fd6eeef88633f6f6c06c650.zip |
[WebAssembly] Bitselect intrinsic and instruction
Summary: Depends on D52755.
Reviewers: aheejin, dschuff
Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D52805
llvm-svn: 343739
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyISD.def | 1 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-arith.ll | 94 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll | 84 | ||||
-rw-r--r-- | llvm/test/MC/WebAssembly/simd-encodings.s | 3 |
7 files changed, 209 insertions, 7 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index efaee7cc930..2189d9ef27f 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -91,6 +91,10 @@ def int_wasm_atomic_notify: // SIMD intrinsics //===----------------------------------------------------------------------===// +def int_wasm_bitselect : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_anytrue : Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 590f8dab759..9e1d198b079 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -24,5 +24,6 @@ HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(SHUFFLE) HANDLE_NODETYPE(ANYTRUE) HANDLE_NODETYPE(ALLTRUE) +HANDLE_NODETYPE(BITSELECT) // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a8992d89ca3..d5dcbf1d699 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -965,6 +965,11 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntNo) { default: return {}; // Don't custom lower most intrinsics. + + case Intrinsic::wasm_bitselect: + return DAG.getNode(WebAssemblyISD::BITSELECT, DL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::wasm_anytrue: case Intrinsic::wasm_alltrue: { unsigned OpCode = IntNo == Intrinsic::wasm_anytrue @@ -972,6 +977,7 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, : WebAssemblyISD::ALLTRUE; return DAG.getNode(OpCode, DL, Op.getValueType(), Op.getOperand(1)); } + case Intrinsic::wasm_lsda: // TODO For now, just return 0 not to crash return DAG.getConstant(0, DL, Op.getValueType()); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 991a5a5773a..bf5582a11f0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -20,8 +20,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; // Custom nodes for custom operations def wasm_shuffle_t : SDTypeProfile<1, 18, []>; +def wasm_bitselect_t : SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>] +>; def wasm_reduce_t : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>; def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; +def wasm_bitselect : SDNode<"WebAssemblyISD::BITSELECT", wasm_bitselect_t>; def wasm_anytrue : SDNode<"WebAssemblyISD::ANYTRUE", wasm_reduce_t>; def wasm_alltrue : SDNode<"WebAssemblyISD::ALLTRUE", wasm_reduce_t>; @@ -193,6 +197,16 @@ multiclass SIMDNot<ValueType vec_t, PatFrag splat_pat, ValueType lane_t> { )], "v128.not\t$dst, $vec", "v128.not", 63>; } +multiclass Bitselect<ValueType vec_t> { + defm BITSELECT_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), + [(set (vec_t V128:$dst), + (vec_t (wasm_bitselect + (vec_t V128:$c), (vec_t V128:$v1), (vec_t V128:$v2) + )) + )], + "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 64>; +} multiclass SIMDReduceVec<ValueType vec_t, string vec, string name, SDNode op, bits<32> simdop> { defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), @@ -380,6 +394,9 @@ defm "" : SIMDNot<v8i16, splat8, i32>; defm "" : SIMDNot<v4i32, splat4, i32>; defm "" : SIMDNot<v2i64, splat2, i64>; +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in +defm "" : Bitselect<vec_t>; + defm ANYTRUE : SIMDReduce<"any_true", wasm_anytrue, 65>; defm ALLTRUE : SIMDReduce<"all_true", wasm_alltrue, 69>; @@ -443,6 +460,13 @@ def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>; } +// Bitselect is equivalent to (c & v1) | (~c & v2) +foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in + def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), + (and (vnot V128:$c), (vec_t V128:$v2)))), + (!cast<Instruction>("BITSELECT_"#vec_t) + V128:$v1, V128:$v2, V128:$c)>; + // Lower float comparisons that don't care about NaN to standard // WebAssembly float comparisons. These instructions are generated in // the target-independent expansion of unordered comparisons and diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index f20dc82a21b..317b87fd692 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128 -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM,SIMD128-VM-SLOW +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM,SIMD128-VM-FAST +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128,NO-SIMD128-SLOW +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128,NO-SIMD128-FAST ; Test that basic SIMD128 arithmetic operations assemble as expected. @@ -165,6 +165,27 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ret <16 x i8> %a } +; CHECK-LABEL: bitselect_v16i8: +; NO-SIMD128-NOT: v128 +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.not +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.or +; SIMD128-FAST-NEXT: return +define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { + %masked_v1 = and <16 x i8> %c, %v1 + %inv_mask = xor <16 x i8> %c, + <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %masked_v2 = and <16 x i8> %inv_mask, %v2 + %a = or <16 x i8> %masked_v1, %masked_v2 + ret <16 x i8> %a +} + ; ============================================================================== ; 8 x i16 ; ============================================================================== @@ -313,6 +334,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ret <8 x i16> %a } +; CHECK-LABEL: bitselect_v8i16: +; NO-SIMD128-NOT: v128 +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.not +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.or +; SIMD128-FAST-NEXT: return +define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { + %masked_v1 = and <8 x i16> %v1, %c + %inv_mask = xor <8 x i16> + <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, + %c + %masked_v2 = and <8 x i16> %v2, %inv_mask + %a = or <8 x i16> %masked_v1, %masked_v2 + ret <8 x i16> %a +} + ; ============================================================================== ; 4 x i32 ; ============================================================================== @@ -458,6 +500,25 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ret <4 x i32> %a } +; CHECK-LABEL: bitselect_v4i32: +; NO-SIMD128-NOT: v128 +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +; SIMD128-FAST-NEXT: v128.not +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.or +; SIMD128-FAST-NEXT: return +define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { + %masked_v1 = and <4 x i32> %c, %v1 + %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c + %masked_v2 = and <4 x i32> %inv_mask, %v2 + %a = or <4 x i32> %masked_v2, %masked_v1 + ret <4 x i32> %a +} + ; ============================================================================== ; 2 x i64 ; ============================================================================== @@ -653,6 +714,26 @@ define <2 x i64> @not_v2i64(<2 x i64> %x) { ret <2 x i64> %a } +; CHECK-LABEL: bitselect_v2i64: +; NO-SIMD128-NOT: v128 +; SIMD128-VM-NOT: v128 +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} +; SIMD128-FAST-NEXT: v128.not +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.and +; SIMD128-FAST-NEXT: v128.or +; SIMD128-FAST-NEXT: return +define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { + %masked_v1 = and <2 x i64> %v1, %c + %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c + %masked_v2 = and <2 x i64> %v2, %inv_mask + %a = or <2 x i64> %masked_v2, %masked_v1 + ret <2 x i64> %a +} + ; ============================================================================== ; 4 x float ; ============================================================================== @@ -761,7 +842,6 @@ define <2 x double> @abs_v2f64(<2 x double> %x) { ret <2 x double> %a } - ; CHECK-LABEL: add_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: f62x2 diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 3b0223d46f9..1c693164305 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -33,6 +33,19 @@ define i32 @all_v16i8(<16 x i8> %x) { ret i32 %a } +; CHECK-LABEL: bitselect_v16i8: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.bitselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { + %a = call <16 x i8> @llvm.wasm.bitselect.v16i8( + <16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 + ) + ret <16 x i8> %a +} + ; ============================================================================== ; 8 x i16 ; ============================================================================== @@ -58,6 +71,19 @@ define i32 @all_v8i16(<8 x i16> %x) { ret i32 %a } +; CHECK-LABEL: bitselect_v8i16: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.bitselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { + %a = call <8 x i16> @llvm.wasm.bitselect.v8i16( + <8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 + ) + ret <8 x i16> %a +} + ; ============================================================================== ; 4 x i32 ; ============================================================================== @@ -83,6 +109,19 @@ define i32 @all_v4i32(<4 x i32> %x) { ret i32 %a } +; CHECK-LABEL: bitselect_v4i32: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { + %a = call <4 x i32> @llvm.wasm.bitselect.v4i32( + <4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 + ) + ret <4 x i32> %a +} + ; ============================================================================== ; 2 x i64 ; ============================================================================== @@ -107,3 +146,48 @@ define i32 @all_v2i64(<2 x i64> %x) { %a = call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> %x) ret i32 %a } + +; CHECK-LABEL: bitselect_v2i64: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.bitselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { + %a = call <2 x i64> @llvm.wasm.bitselect.v2i64( + <2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2 + ) + ret <2 x i64> %a +} + +; ============================================================================== +; 4 x f32 +; ============================================================================== +; CHECK-LABEL: bitselect_v4f32: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.bitselect.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @bitselect_v4f32(<4 x float> %c, <4 x float> %v1, <4 x float> %v2) { + %a = call <4 x float> @llvm.wasm.bitselect.v4f32( + <4 x float> %c, <4 x float> %v1, <4 x float> %v2 + ) + ret <4 x float> %a +} + +; ============================================================================== +; 2 x f64 +; ============================================================================== +; CHECK-LABEL: bitselect_v2f64: +; SIMD128-NEXT: .param v128, v128, v128{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.bitselect.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @bitselect_v2f64(<2 x double> %c, <2 x double> %v1, <2 x double> %v2) { + %a = call <2 x double> @llvm.wasm.bitselect.v2f64( + <2 x double> %c, <2 x double> %v1, <2 x double> %v2 + ) + ret <2 x double> %a +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index beb2b5583a6..a8c9b6df445 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -193,6 +193,9 @@ # CHECK: v128.not # encoding: [0xfd,0x3f] v128.not + # CHECK: v128.bitselect # encoding: [0xfd,0x40] + v128.bitselect + # CHECK: i8x16.any_true # encoding: [0xfd,0x41] i8x16.any_true |