summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorSimon Tatham <simon.tatham@arm.com>2020-01-08 13:37:12 +0000
committerSimon Tatham <simon.tatham@arm.com>2020-01-08 14:42:24 +0000
commitdac7b23cc3efbb4ccb6a9ea101f367f866f334e2 (patch)
tree0f79d0c148858ffdc60745a4a7456c6ccec7219c /llvm/lib/Target/ARM
parent3100480925df10960c1e0a077dd9875037d3fe29 (diff)
downloadbcm5719-llvm-dac7b23cc3efbb4ccb6a9ea101f367f866f334e2.tar.gz
bcm5719-llvm-dac7b23cc3efbb4ccb6a9ea101f367f866f334e2.zip
[ARM,MVE] Intrinsics for variable shift instructions.
This batch of intrinsics fills in all the shift instructions that take a variable shift distance in a register, instead of an immediate. Some of these instructions take a single shift distance in a scalar register and apply it to all lanes; others take a vector of per-lane distances. These instructions are all basically one family, varying in whether they saturate out-of-range values, and whether they round when bits are shifted off the bottom. I've implemented them at the IR level by a much smaller family of IR intrinsics, which take flag parameters to indicate saturating and/or rounding (along with the usual one to specify signed/unsigned integers). An oddity is that all of them are //left// shift instructions – but if you pass a negative shift count, they'll shift right. So the vector shift distances are always vectors of //signed// integers, regardless of whether you're considering the other input vector to be of signed or unsigned. Also, even the simplest `vshlq` instruction in this family (neither saturating nor rounding) has to be implemented as an IR intrinsic, because the ordinary LLVM IR `shl` operation would consider an out-of-range shift count to be undefined behavior. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72329
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td61
1 files changed, 49 insertions, 12 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index c98f72b053a..19dadf229e5 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2727,13 +2727,32 @@ class MVE_shift_by_vec<string iname, string suffix, bit U,
let validForTailPredication = 1;
}
+multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
- def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
- def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
- def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
- def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
- def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
- def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
+ defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
+ defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
+ defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
+ defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
+ defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
+ defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
}
defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
@@ -4542,13 +4561,31 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
let validForTailPredication = 1;
}
+multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (!cast<Instruction>(NAME)
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+}
+
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
- def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
- def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
- def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
- def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
- def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
- def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
+ defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
+ defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
+ defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
+ defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
+ defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
+ defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
}
defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
OpenPOWER on IntegriCloud