summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2012-04-11 06:59:47 +0000
committerEvan Cheng <evan.cheng@apple.com>2012-04-11 06:59:47 +0000
commit5efc442290d45d4e5b27f9d90a83be29f222a811 (patch)
tree2c564f8e1bce9b3d5ff944c050619ebb9a1b9e72 /llvm/lib/Target/ARM
parent9bc178ac5c8f103c328d626f8307756e4b7d753d (diff)
downloadbcm5719-llvm-5efc442290d45d4e5b27f9d90a83be29f222a811.tar.gz
bcm5719-llvm-5efc442290d45d4e5b27f9d90a83be29f222a811.zip
Add more fused mul+add/sub patterns. rdar://10139676
llvm-svn: 154484
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td10
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td44
2 files changed, 50 insertions, 4 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index ea16dcd8e38..c2b8ce466da 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4133,12 +4133,18 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
Requires<[HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)),
+def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)),
+def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+ (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasVFP4]>;
// Vector Subtract Operations.
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 28cf460f874..3600b889d6f 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1081,10 +1081,10 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
-def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)),
+def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
-def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)),
+def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1114,6 +1114,22 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
+def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+ (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+ (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
@@ -1141,12 +1157,20 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma x, y, z)
def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
Requires<[HasVFP4]>;
def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+ (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+ (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1173,6 +1197,22 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+// Match @llvm.fma.* intrinsics
+// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vnfms x, y, z)
+def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
+
//===----------------------------------------------------------------------===//
// FP Conditional moves.
//
OpenPOWER on IntegriCloud