summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorSimon Tatham <simon.tatham@arm.com>2019-11-15 14:05:02 +0000
committerSimon Tatham <simon.tatham@arm.com>2019-11-15 14:06:00 +0000
commitb0c1900820c3f0a94e3c74e6dcb1317b9cda5af8 (patch)
treeb2858f1256172ef1a22c678bc88128e9b0665c77 /llvm/lib/Target/ARM
parentc2f6efc732efa3921e34e24361d58e51d25460b1 (diff)
downloadbcm5719-llvm-b0c1900820c3f0a94e3c74e6dcb1317b9cda5af8.tar.gz
bcm5719-llvm-b0c1900820c3f0a94e3c74e6dcb1317b9cda5af8.zip
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary: As well as vector/vector compare instructions, MVE also has a family of comparisons taking a vector and a scalar, which compare every lane of the vector against the same value. We generate those at isel time using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`. This commit adds corresponding patterns for the operand-reversed form `(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as necessary. That way, we can still generate the vector/scalar compare instruction if the IR happens to have been rearranged to put the operands the other way round, which can happen in some optimization phases. Previously, a vcmp the other way round was handled by emitting a `vdup` instruction to //explicitly// replicate the scalar input into a vector, and then doing a vector/vector comparison. I haven't added a new test, because it turned out that several existing tests were already exhibiting that failure mode. So just updating the expected output in the existing MVE codegen tests demonstrates what's been improved. Reviewers: ostannard, MarkMurrayARM, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70296
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td61
1 files changed, 48 insertions, 13 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index f96520e37dc..40089daf411 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3299,6 +3299,31 @@ multiclass unpred_vcmp_r<string suffix, int fc> {
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
}
+multiclass unpred_vcmp_r_reversible<string suffix, int fc, int fcReversed> {
+ defm "": unpred_vcmp_r<suffix, fc>;
+
+ // Additional patterns that match the vector/scalar comparisons the
+ // opposite way round, with the ARMvdup in the first operand of the
+ // ARMvcmp. These will usually need a different condition code
+ // (except for the symmetric conditions EQ and NE). They're in a
+ // separate multiclass because the unsigned CS and HI comparisons
+ // don't have reversed forms.
+
+ def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
+ def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
+ def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>;
+
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>;
+}
+
multiclass unpred_vcmpf_z<int fc> {
def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
@@ -3311,7 +3336,7 @@ multiclass unpred_vcmpf_z<int fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
}
-multiclass unpred_vcmpf_r<int fc> {
+multiclass unpred_vcmpf_r<int fc, int fcReversed> {
def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
@@ -3322,6 +3347,11 @@ multiclass unpred_vcmpf_r<int fc> {
def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
+ def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>;
+ def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>;
+
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
@@ -3331,6 +3361,11 @@ multiclass unpred_vcmpf_r<int fc> {
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
+
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>;
}
let Predicates = [HasMVEInt] in {
@@ -3343,12 +3378,12 @@ let Predicates = [HasMVEInt] in {
defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
- defm MVE_VCEQ : unpred_vcmp_r<"i", 0>;
- defm MVE_VCNE : unpred_vcmp_r<"i", 1>;
- defm MVE_VCGE : unpred_vcmp_r<"s", 10>;
- defm MVE_VCLT : unpred_vcmp_r<"s", 11>;
- defm MVE_VCGT : unpred_vcmp_r<"s", 12>;
- defm MVE_VCLE : unpred_vcmp_r<"s", 13>;
+ defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>;
+ defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>;
+ defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>;
+ defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>;
+ defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>;
+ defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>;
defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
}
@@ -3361,12 +3396,12 @@ let Predicates = [HasMVEFloat] in {
defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
- defm MVE_VFCEQ : unpred_vcmpf_r<0>;
- defm MVE_VFCNE : unpred_vcmpf_r<1>;
- defm MVE_VFCGE : unpred_vcmpf_r<10>;
- defm MVE_VFCLT : unpred_vcmpf_r<11>;
- defm MVE_VFCGT : unpred_vcmpf_r<12>;
- defm MVE_VFCLE : unpred_vcmpf_r<13>;
+ defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>;
+ defm MVE_VFCNE : unpred_vcmpf_r<1, 1>;
+ defm MVE_VFCGE : unpred_vcmpf_r<10, 13>;
+ defm MVE_VFCLT : unpred_vcmpf_r<11, 12>;
+ defm MVE_VFCGT : unpred_vcmpf_r<12, 11>;
+ defm MVE_VFCLE : unpred_vcmpf_r<13, 10>;
}
OpenPOWER on IntegriCloud