diff options
| author | Vladimir Sukharev <vladimir.sukharev@arm.com> | 2015-03-26 18:29:02 +0000 |
|---|---|---|
| committer | Vladimir Sukharev <vladimir.sukharev@arm.com> | 2015-03-26 18:29:02 +0000 |
| commit | 4b18c727a27156e0f92f8af442894dd078b464f0 (patch) | |
| tree | e3800ed68bdc728eba1d735cb79a9bf699805062 /llvm/lib/Target/ARM | |
| parent | edc71abedd4d32750f7d08751af12a873134d32b (diff) | |
| download | bcm5719-llvm-4b18c727a27156e0f92f8af442894dd078b464f0.tar.gz bcm5719-llvm-4b18c727a27156e0f92f8af442894dd078b464f0.zip | |
[ARM] Add v8.1a "Rounding Double Multiply Add/Subtract" extension
Reviewers: t.p.northover
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D8503
llvm-svn: 233301
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 180 |
2 files changed, 168 insertions, 14 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index e9e8b8fa620..c3984cae2db 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -226,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 655fc812f00..a6a07a8f02e 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode ShOp> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode MulOp, SDNode ShOp> + SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VMulOpSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDNode ShOp> { + string OpcodeStr, string Dt, SDPatternOperator ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, @@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } // Neon 3-argument intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, +// element sizes of 16 and 32 bits: +multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, - OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, - OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, + itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ + // 64-bit vector types. + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + // 128-bit vector types. + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; +} // Neon Long Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: @@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; +let Predicates = [HasNEON, HasV8_1a] in { + // v8.1a Neon Rounding Double Multiply-Op vector operations, + // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long + // (Q += D * D) + defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, + imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long + // (Q -= D * D) + defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +} // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlal", "s", null_frag>; |

