diff options
| author | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-06 10:04:23 +0000 |
|---|---|---|
| committer | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-06 10:39:06 +0000 |
| commit | bb8c679f4bf2a2056b4fafb8b3a3d61254e39219 (patch) | |
| tree | da8e773996fdfd4356f4a9364ed79eb0954dde9b /llvm/lib | |
| parent | d8821adacbb02fe9f9707079087d35cb02ef4a6c (diff) | |
| download | bcm5719-llvm-bb8c679f4bf2a2056b4fafb8b3a3d61254e39219.tar.gz bcm5719-llvm-bb8c679f4bf2a2056b4fafb8b3a3d61254e39219.zip | |
[AArch64][SVE] Implement integer compare intrinsics
Summary:
Adds intrinsics for the following:
* cmphs, cmphi
* cmpge, cmpgt
* cmpeq, cmpne
* cmplt, cmple
* cmplo, cmpls
Includes a minor change to `TLI.getMemValueType` that fixes a crash due to the
scalable flag being dropped.
Reviewers: sdesmalen, efriedma, rengolin, rovka, dancgr, huntergr
Reviewed By: efriedma
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70889
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 139 |
3 files changed, 168 insertions, 34 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index fee825422ca..ed1ac25bddd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -726,6 +726,13 @@ def imm0_127 : Operand<i32>, ImmLeaf<i32, [{ let PrintMethod = "printImm"; } +def imm0_127_64b : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 128; +}]> { + let ParserMatchClass = Imm0_127Operand; + let PrintMethod = "printImm"; +} + // NOTE: These imm0_N operands have to be of type i64 because i64 is the size // for all shift-amounts. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0fb74f04984..e374ce4edd7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -735,34 +735,34 @@ let Predicates = [HasSVE] in { defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">; defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">; - defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">; - defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">; - defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">; - defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">; - defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">; - defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">; - - defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">; - defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">; - defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">; - defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">; - defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">; - defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">; - defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">; - defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">; - defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">; - defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">; - - defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">; - defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">; - defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">; - defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">; - defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">; - defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">; - defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">; - defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">; - defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">; - defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", int_aarch64_sve_cmphs, SETUGE>; + defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", int_aarch64_sve_cmphi, SETUGT>; + defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", int_aarch64_sve_cmpge, SETGE>; + defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", int_aarch64_sve_cmpgt, SETGT>; + defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", int_aarch64_sve_cmpeq, SETEQ>; + defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", int_aarch64_sve_cmpne, SETNE>; + + defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>; + defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>; + defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge", int_aarch64_sve_cmpge_wide>; + defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt", int_aarch64_sve_cmpgt_wide>; + defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt", int_aarch64_sve_cmplt_wide>; + defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple", int_aarch64_sve_cmple_wide>; + defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs", int_aarch64_sve_cmphs_wide>; + defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi", int_aarch64_sve_cmphi_wide>; + defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>; + defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>; + + defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, int_aarch64_sve_cmpge>; + defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, int_aarch64_sve_cmpgt>; + defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, null_frag, int_aarch64_sve_cmpgt>; + defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, null_frag, int_aarch64_sve_cmpge>; + defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, int_aarch64_sve_cmpeq>; + defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, int_aarch64_sve_cmpne>; + defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, int_aarch64_sve_cmphs>; + defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, int_aarch64_sve_cmphi>; + defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>; + defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>; defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>; defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 2581f611df2..5b0809aa7b5 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3537,23 +3537,37 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm, let Defs = [NZCV]; } -multiclass sve_int_cmp_0<bits<3> opc, string asm> { +multiclass sve_int_cmp_0<bits<3> opc, string asm, SDPatternOperator op, + CondCode cc> { def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>; def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>; def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>; def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; } -multiclass sve_int_cmp_0_wide<bits<3> opc, string asm> { +multiclass sve_int_cmp_0_wide<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>; } -multiclass sve_int_cmp_1_wide<bits<3> opc, string asm> { +multiclass sve_int_cmp_1_wide<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>; } @@ -3585,13 +3599,70 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty, let Inst{3-0} = Pd; let Defs = [NZCV]; + let ElementSize = pprty.ElementSize; } -multiclass sve_int_scmp_vi<bits<3> opc, string asm> { +multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc, + SDPatternOperator op = null_frag, + SDPatternOperator inv_op = null_frag> { def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>; def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>; def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>; def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>; + + // IR version + def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1), + (nxv16i8 (AArch64dup (simm5_32b:$imm))), + cc)), + (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1), + (nxv8i16 (AArch64dup (simm5_32b:$imm))), + cc)), + (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1), + (nxv4i32 (AArch64dup (simm5_32b:$imm))), + cc)), + (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1), + (nxv2i64 (AArch64dup (simm5_64b:$imm))), + cc)), + (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, simm5_64b:$imm)>; + + // Intrinsic version + def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg), + (nxv16i8 ZPR:$Zs1), + (nxv16i8 (AArch64dup (simm5_32b:$imm))))), + (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg), + (nxv8i16 ZPR:$Zs1), + (nxv8i16 (AArch64dup (simm5_32b:$imm))))), + (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg), + (nxv4i32 ZPR:$Zs1), + (nxv4i32 (AArch64dup (simm5_32b:$imm))))), + (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg), + (nxv2i64 ZPR:$Zs1), + (nxv2i64 (AArch64dup (simm5_64b:$imm))))), + (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>; + + // Inverted intrinsic version + def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg), + (nxv16i8 (AArch64dup (simm5_32b:$imm))), + (nxv16i8 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg), + (nxv8i16 (AArch64dup (simm5_32b:$imm))), + (nxv8i16 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg), + (nxv4i32 (AArch64dup (simm5_32b:$imm))), + (nxv4i32 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>; + def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg), + (nxv2i64 (AArch64dup (simm5_64b:$imm))), + (nxv2i64 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>; } @@ -3622,11 +3693,67 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty, let Defs = [NZCV]; } -multiclass sve_int_ucmp_vi<bits<2> opc, string asm> { +multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc, + SDPatternOperator op = null_frag, + SDPatternOperator inv_op = null_frag> { def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>; def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>; def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>; - def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127>; + def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127_64b>; + + // IR version + def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1), + (nxv16i8 (AArch64dup (imm0_127:$imm))), + cc)), + (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1), + (nxv8i16 (AArch64dup (imm0_127:$imm))), + cc)), + (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1), + (nxv4i32 (AArch64dup (imm0_127:$imm))), + cc)), + (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1), + (nxv2i64 (AArch64dup (imm0_127_64b:$imm))), + cc)), + (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, imm0_127_64b:$imm)>; + + // Intrinsic version + def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg), + (nxv16i8 ZPR:$Zs1), + (nxv16i8 (AArch64dup (imm0_127:$imm))))), + (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg), + (nxv8i16 ZPR:$Zs1), + (nxv8i16 (AArch64dup (imm0_127:$imm))))), + (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg), + (nxv4i32 ZPR:$Zs1), + (nxv4i32 (AArch64dup (imm0_127:$imm))))), + (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg), + (nxv2i64 ZPR:$Zs1), + (nxv2i64 (AArch64dup (imm0_127_64b:$imm))))), + (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>; + + // Inverted intrinsic version + def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg), + (nxv16i8 (AArch64dup (imm0_127:$imm))), + (nxv16i8 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg), + (nxv8i16 (AArch64dup (imm0_127:$imm))), + (nxv8i16 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg), + (nxv4i32 (AArch64dup (imm0_127:$imm))), + (nxv4i32 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>; + def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg), + (nxv2i64 (AArch64dup (imm0_127_64b:$imm))), + (nxv2i64 ZPR:$Zs1))), + (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>; } |

