summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSilviu Baranga <silviu.baranga@arm.com>2016-02-04 16:47:09 +0000
committerSilviu Baranga <silviu.baranga@arm.com>2016-02-04 16:47:09 +0000
commit33b3bd17dd1cc6a00de7ff267b466909dfba186d (patch)
tree37754da1f758087b3473e43125b0a5770679a273 /llvm/lib
parente4dff62f64082770ac35bf556142b89e8d00d500 (diff)
downloadbcm5719-llvm-33b3bd17dd1cc6a00de7ff267b466909dfba186d.tar.gz
bcm5719-llvm-33b3bd17dd1cc6a00de7ff267b466909dfba186d.zip
[AArch64] Multiply extended 32-bit ints with `[U|S]MADDL'
During instruction selection, the AArch64 backend can recognise the following pattern and generate an [U|S]MADDL instruction, i.e. a multiply of two 32-bit operands with a 64-bit result: (mul (sext i32), (sext i32)) However, when one of the operands is constant, the sign extension gets folded into the constant in SelectionDAG::getNode(). This means that the instruction selection sees this: (mul (sext i32), i64) ...which doesn't match the pattern. Sign-extension and 64-bit multiply instructions are generated, which are slower than one 32-bit multiply. Add a pattern to match this and generate the correct instruction, for both signed and unsigned multiplies. Patch by Chris Diamand! llvm-svn: 259800
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td40
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4f052e81de2..53999b9a6fc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -528,6 +528,12 @@ def i64imm_32bit : ImmLeaf<i64, [{
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
}]>;
+def s64imm_32bit : ImmLeaf<i64, [{
+ int64_t Imm64 = static_cast<int64_t>(Imm);
+ return Imm64 >= std::numeric_limits<int32_t>::min() &&
+ Imm64 <= std::numeric_limits<int32_t>::max();
+}]>;
+
def trunc_imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
@@ -734,6 +740,40 @@ def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
(SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
(UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+
+def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
+ (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
+ (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
+ (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+
+def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
+ (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
+ (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
+ (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+
+def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
+ (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
+ (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
+ GPR64:$Ra)),
+ (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+
+def : Pat<(i64 (sub (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
+ (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (sub (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
+ (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (sub (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
+ GPR64:$Ra)),
+ (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
} // AddedComplexity = 5
def : MulAccumWAlias<"mul", MADDWrrr>;
OpenPOWER on IntegriCloud