diff options
| author | Simon Tatham <simon.tatham@arm.com> | 2019-11-15 14:05:02 +0000 |
|---|---|---|
| committer | Simon Tatham <simon.tatham@arm.com> | 2019-11-15 14:06:00 +0000 |
| commit | b0c1900820c3f0a94e3c74e6dcb1317b9cda5af8 (patch) | |
| tree | b2858f1256172ef1a22c678bc88128e9b0665c77 /llvm/lib/Target/ARM | |
| parent | c2f6efc732efa3921e34e24361d58e51d25460b1 (diff) | |
| download | bcm5719-llvm-b0c1900820c3f0a94e3c74e6dcb1317b9cda5af8.tar.gz bcm5719-llvm-b0c1900820c3f0a94e3c74e6dcb1317b9cda5af8.zip | |
[ARM,MVE] Add reversed isel patterns for MVE `vcmp qN,rN`
Summary:
As well as vector/vector compare instructions, MVE also has a family
of comparisons taking a vector and a scalar, which compare every lane
of the vector against the same value. We generate those at isel time
using isel patterns that match `(ARMvcmp vector, (ARMvdup scalar))`.
This commit adds corresponding patterns for the operand-reversed form
`(ARMvcmp (ARMvdup scalar), vector)`, with condition codes swapped as
necessary. That way, we can still generate the vector/scalar compare
instruction if the IR happens to have been rearranged to put the
operands the other way round, which can happen in some optimization
phases. Previously, a vcmp the other way round was handled by emitting
a `vdup` instruction to //explicitly// replicate the scalar input into
a vector, and then doing a vector/vector comparison.
I haven't added a new test, because it turned out that several
existing tests were already exhibiting that failure mode. So just
updating the expected output in the existing MVE codegen tests
demonstrates what's been improved.
Reviewers: ostannard, MarkMurrayARM, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70296
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 61 |
1 files changed, 48 insertions, 13 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f96520e37dc..40089daf411 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3299,6 +3299,31 @@ multiclass unpred_vcmp_r<string suffix, int fc> { (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; } +multiclass unpred_vcmp_r_reversible<string suffix, int fc, int fcReversed> { + defm "": unpred_vcmp_r<suffix, fc>; + + // Additional patterns that match the vector/scalar comparisons the + // opposite way round, with the ARMvdup in the first operand of the + // ARMvcmp. These will usually need a different condition code + // (except for the symmetric conditions EQ and NE). They're in a + // separate multiclass because the unsigned CS and HI comparisons + // don't have reversed forms. + + def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; +} + multiclass unpred_vcmpf_z<int fc> { def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; @@ -3311,7 +3336,7 @@ multiclass unpred_vcmpf_z<int fc> { (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmpf_r<int fc> { +multiclass unpred_vcmpf_r<int fc, int fcReversed> { def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), @@ -3322,6 +3347,11 @@ multiclass unpred_vcmpf_r<int fc> { def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>; + def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), @@ -3331,6 +3361,11 @@ multiclass unpred_vcmpf_r<int fc> { (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { @@ -3343,12 +3378,12 @@ let Predicates = [HasMVEInt] in { defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; - defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; - defm MVE_VCNE : unpred_vcmp_r<"i", 1>; - defm MVE_VCGE : unpred_vcmp_r<"s", 10>; - defm MVE_VCLT : unpred_vcmp_r<"s", 11>; - defm MVE_VCGT : unpred_vcmp_r<"s", 12>; - defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>; + defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>; + defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>; + defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>; + defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>; + defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>; defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; } @@ -3361,12 +3396,12 @@ let Predicates = [HasMVEFloat] in { defm MVE_VFCGTZ : unpred_vcmpf_z<12>; defm MVE_VFCLEZ : unpred_vcmpf_z<13>; - defm MVE_VFCEQ : unpred_vcmpf_r<0>; - defm MVE_VFCNE : unpred_vcmpf_r<1>; - defm MVE_VFCGE : unpred_vcmpf_r<10>; - defm MVE_VFCLT : unpred_vcmpf_r<11>; - defm MVE_VFCGT : unpred_vcmpf_r<12>; - defm MVE_VFCLE : unpred_vcmpf_r<13>; + defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>; + defm MVE_VFCNE : unpred_vcmpf_r<1, 1>; + defm MVE_VFCGE : unpred_vcmpf_r<10, 13>; + defm MVE_VFCLT : unpred_vcmpf_r<11, 12>; + defm MVE_VFCGT : unpred_vcmpf_r<12, 11>; + defm MVE_VFCLE : unpred_vcmpf_r<13, 10>; } |

