Revert r312724 ("[ARM] Remove redundant vcvt patterns.").

It leads to some improvements, but also a regression for the simple case, so it's not clearly a good idea. test/CodeGen/ARM/vcvt.ll now has test coverage to show the difference. Ultimately, the right solution is probably to custom-lower fp-to-int conversions, to something like ARMISD::VCVT_F32_S32 plus a bitcast. It's hard to do the right thing when the implicit bitcast isn't visible to DAG transforms. llvm-svn: 314169
author: Eli Friedman <efriedma@codeaurora.org> 2017-09-25 22:07:33 +0000
committer: Eli Friedman <efriedma@codeaurora.org> 2017-09-25 22:07:33 +0000
commit: edee9999c4de706f29272eb9757260a188cae59e (patch)
tree: e81a8a09974bc86be79b0132832f7e339f484a4d
parent: b7f45eb60960f44259c88d3b266039e009994958 (diff)
download: bcm5719-llvm-edee9999c4de706f29272eb9757260a188cae59e.tar.gz
bcm5719-llvm-edee9999c4de706f29272eb9757260a188cae59e.zip
2 files changed, 28 insertions, 14 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 0549a198b5b..362ffedf32c 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1435,6 +1435,9 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
 let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
                (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+  def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+               (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
 }
 
 def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
@@ -1452,6 +1455,10 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
 def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
                    (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
 
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+                                   addrmode5:$ptr),
+                   (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
 def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
                                  (outs SPR:$Sd), (ins SPR:$Sm),
                                  IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
@@ -1471,6 +1478,9 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
 let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
                (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+  def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+               (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
 }
 
 def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
@@ -1488,6 +1498,10 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
 def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
                    (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
 
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+                                   addrmode5:$ptr),
+                  (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
 def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
                                  (outs SPR:$Sd), (ins SPR:$Sm),
                                  IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
diff --git a/llvm/test/CodeGen/ARM/vcvt.ll b/llvm/test/CodeGen/ARM/vcvt.ll
index 884a1a47ae2..5f470d60707 100644
--- a/llvm/test/CodeGen/ARM/vcvt.ll
+++ b/llvm/test/CodeGen/ARM/vcvt.ll
@@ -355,9 +355,11 @@ define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov d16, r0, r1
 ; CHECK-NEXT:    vcvt.s32.f64 s0, d16
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r2]
-; CHECK-NEXT:    str r0, [r3]
+; CHECK-NEXT:    vstr s0, [r2]
+; CHECK-NEXT:    vcvt.s32.f64 s0, d16
+; CHECK-NEXT:    vcvt.s32.f64 s2, d16
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vstr s0, [r3]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptosi double %c to i32
   store i32 %conv, i32* %p, align 4
@@ -370,9 +372,11 @@ define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov d16, r0, r1
 ; CHECK-NEXT:    vcvt.u32.f64 s0, d16
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r2]
-; CHECK-NEXT:    str r0, [r3]
+; CHECK-NEXT:    vstr s0, [r2]
+; CHECK-NEXT:    vcvt.u32.f64 s0, d16
+; CHECK-NEXT:    vcvt.u32.f64 s2, d16
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vstr s0, [r3]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptoui double %c to i32
   store i32 %conv, i32* %p, align 4
@@ -385,8 +389,7 @@ define void @double_to_sint_store(double %c, i32* nocapture %p) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov d16, r0, r1
 ; CHECK-NEXT:    vcvt.s32.f64 s0, d16
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r2]
+; CHECK-NEXT:    vstr s0, [r2]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptosi double %c to i32
   store i32 %conv, i32* %p, align 4
@@ -398,8 +401,7 @@ define void @double_to_uint_store(double %c, i32* nocapture %p) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov d16, r0, r1
 ; CHECK-NEXT:    vcvt.u32.f64 s0, d16
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r2]
+; CHECK-NEXT:    vstr s0, [r2]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptoui double %c to i32
   store i32 %conv, i32* %p, align 4
@@ -411,8 +413,7 @@ define void @float_to_sint_store(float %c, i32* nocapture %p) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    vcvt.s32.f32 s0, s0
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vstr s0, [r1]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptosi float %c to i32
   store i32 %conv, i32* %p, align 4
@@ -424,8 +425,7 @@ define void @float_to_uint_store(float %c, i32* nocapture %p) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    vcvt.u32.f32 s0, s0
-; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vstr s0, [r1]
 ; CHECK-NEXT:    mov pc, lr
   %conv = fptoui float %c to i32
   store i32 %conv, i32* %p, align 4
author	Eli Friedman <efriedma@codeaurora.org>	2017-09-25 22:07:33 +0000
committer	Eli Friedman <efriedma@codeaurora.org>	2017-09-25 22:07:33 +0000
commit	edee9999c4de706f29272eb9757260a188cae59e (patch)
tree	e81a8a09974bc86be79b0132832f7e339f484a4d
parent	b7f45eb60960f44259c88d3b266039e009994958 (diff)
download	bcm5719-llvm-edee9999c4de706f29272eb9757260a188cae59e.tar.gz bcm5719-llvm-edee9999c4de706f29272eb9757260a188cae59e.zip