diff options
author | Simon Tatham <simon.tatham@arm.com> | 2020-01-08 13:37:12 +0000 |
---|---|---|
committer | Simon Tatham <simon.tatham@arm.com> | 2020-01-08 14:42:24 +0000 |
commit | dac7b23cc3efbb4ccb6a9ea101f367f866f334e2 (patch) | |
tree | 0f79d0c148858ffdc60745a4a7456c6ccec7219c /llvm/lib/Target/ARM | |
parent | 3100480925df10960c1e0a077dd9875037d3fe29 (diff) | |
download | bcm5719-llvm-dac7b23cc3efbb4ccb6a9ea101f367f866f334e2.tar.gz bcm5719-llvm-dac7b23cc3efbb4ccb6a9ea101f367f866f334e2.zip |
[ARM,MVE] Intrinsics for variable shift instructions.
This batch of intrinsics fills in all the shift instructions that take
a variable shift distance in a register, instead of an immediate. Some
of these instructions take a single shift distance in a scalar
register and apply it to all lanes; others take a vector of per-lane
distances.
These instructions are all basically one family, varying in whether
they saturate out-of-range values, and whether they round when bits
are shifted off the bottom. I've implemented them at the IR level by a
much smaller family of IR intrinsics, which take flag parameters to
indicate saturating and/or rounding (along with the usual one to
specify signed/unsigned integers).
An oddity is that all of them are //left// shift instructions – but if
you pass a negative shift count, they'll shift right. So the vector
shift distances are always vectors of //signed// integers, regardless
of whether you're considering the other input vector to be of signed
or unsigned. Also, even the simplest `vshlq` instruction in this
family (neither saturating nor rounding) has to be implemented as an
IR intrinsic, because the ordinary LLVM IR `shl` operation would
consider an out-of-range shift count to be undefined behavior.
Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D72329
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 61 |
1 files changed, 49 insertions, 12 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index c98f72b053a..19dadf229e5 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2727,13 +2727,32 @@ class MVE_shift_by_vec<string iname, string suffix, bit U, let validForTailPredication = 1; } +multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> { + def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_vector + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned))), + (VTI.Vec (!cast<Instruction>(NAME) + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast<Instruction>(NAME) + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; +} + multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> { - def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>; - def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>; - def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>; - def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>; - def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>; - def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>; + defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>; + defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>; + defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>; + defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>; + defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>; + defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>; } defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>; @@ -4542,13 +4561,31 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size, let validForTailPredication = 1; } +multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> { + def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar + (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned))), + (VTI.Vec (!cast<Instruction>(NAME) + (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated + (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (!cast<Instruction>(NAME) + (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; +} + multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> { - def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>; - def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>; - def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>; - def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>; - def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>; - def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>; + defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>; + defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>; + defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>; + defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>; + defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>; + defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>; } defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>; |