From bd0f271c9e55ab69b45258e4922869099ed18307 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Wed, 11 Dec 2019 10:04:14 +0000
Subject: [ARM][MVE] Add intrinsics for immediate shifts. (reland)

This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which
shift every lane of a vector left or right by a compile-time
immediate. They mostly work by expanding to the IR `shl`, `lshr` and
`ashr` operations, with their second operand being a vector splat of
the immediate.

There's a fiddly special case, though. ACLE specifies that the
immediate in `vshrq_n` can take values up to //and including// the bit
size of the vector lane. But LLVM IR thinks that shifting right by the
full size of the lane is UB, and feels free to replace the `lshr` with
an `undef` half way through the optimization pipeline. Hence, to keep
this legal in source code, I have to detect it at codegen time.
Logical (unsigned) right shifts by the element size are handled by
simply emitting the zero vector; arithmetic ones are converted into a
shift of one bit less, which will always give the same output.

In order to do that check, I also had to enhance the tablegen
MveEmitter so that it can cope with converting a builtin function's
operand into a bare integer to pass to a code-generating subfunction.
Previously the only bare integers it knew how to handle were flags
generated from within `arm_mve.td`.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: dmgreen, MarkMurrayARM

Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71065
---
 llvm/lib/Target/ARM/ARMInstrMVE.td | 52 +++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 20 deletions(-)

(limited to 'llvm/lib')
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 7bc067c25ef..2a063e4ebde 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2816,27 +2816,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
   let Inst{21} = 0b1;
 }
 
+multiclass MVE_immediate_shift_patterns_inner<
+    MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
+    Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
+
+  def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
+            (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
+
+  def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
+                          !dag(pred_int, unsignedFlag, ?),
+                          (pred_int (VTI.Pred VCCR:$mask),
+                                   (VTI.Vec MQPR:$inactive)))),
+            (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
+                           ARMVCCThen, (VTI.Pred VCCR:$mask),
+                           (VTI.Vec MQPR:$inactive)))>;
+}
+
+multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
+                                        Operand imm_operand_type> {
+  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+      ARMvshlImm, int_arm_mve_shl_imm_predicated,
+      !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
+  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+      ARMvshruImm, int_arm_mve_shr_imm_predicated,
+      !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
+  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+      ARMvshrsImm, int_arm_mve_shr_imm_predicated,
+      !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
+}
+
 let Predicates = [HasMVEInt] in {
-  def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
-            (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
-  def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
-            (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
-  def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
-            (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
-  def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
-            (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
-  def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
-            (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
-  def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
-            (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
-  def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
-            (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
-  def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
-            (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
-  def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
-            (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+  defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
+  defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
+  defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
 }
 
 // end of mve_shift instructions
-- 
cgit v1.2.3