[ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine

Summary: PerformVMOVRRDCombine ommits adding a offset of 4 to the PointerInfo, when converting a f64 = load[M] to {i32, i32} = {load[M], load[M + 4]} Which would allow the machine scheduller to break dependencies with the second load. - pr42638 Reviewers: eli.friedman, dmgreen, ostannard Reviewed By: ostannard Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64870 llvm-svn: 366423
author: Diogo N. Sampaio <diogo.sampaio@arm.com> 2019-07-18 10:05:56 +0000
committer: Diogo N. Sampaio <diogo.sampaio@arm.com> 2019-07-18 10:05:56 +0000
commit: 11512e742b283a2845f1afa6242c63efcd2ac102 (patch)
tree: d4ebfac998cc9697973fa7b6f68eb6e59fb76c40 /llvm
parent: 83748cc5abc199a5219b0e7d9ba308984a8df613 (diff)
download: bcm5719-llvm-11512e742b283a2845f1afa6242c63efcd2ac102.tar.gz
bcm5719-llvm-11512e742b283a2845f1afa6242c63efcd2ac102.zip
2 files changed, 38 insertions, 3 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 09b78115f2e..18bb9bf3ecc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -11748,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                     DAG.getConstant(4, DL, MVT::i32));
-    SDValue NewLD2 = DAG.getLoad(
-        MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
-        std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
+
+    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
+                                 LD->getPointerInfo().getWithOffset(4),
+                                 std::min(4U, LD->getAlignment()),
+                                 LD->getMemOperand()->getFlags());
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
     if (DCI.DAG.getDataLayout().isBigEndian())
diff --git a/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll
new file mode 100644
index 00000000000..aac5de4ce5e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll
@@ -0,0 +1,33 @@
+; RUN: llc -stop-after=machine-scheduler -debug-only dagcombine,selectiondag -o - %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; pr42638
+target triple = "armv8r-arm-none-eabi"
+%struct.__va_list = type { i8* }
+define double @foo(i32 %P0, ...) #0 {
+entry:
+  %V1 = alloca [8 x i8], align 8
+  %vl = alloca %struct.__va_list, align 4
+  %0 = getelementptr inbounds [8 x i8], [8 x i8]* %V1, i32 0, i32 0
+  call void asm sideeffect "", "r"(i8* nonnull %0)
+  %1 = bitcast %struct.__va_list* %vl to i8*
+  call void @llvm.va_start(i8* nonnull %1)
+  %2 = bitcast %struct.__va_list* %vl to double**
+  %argp.cur3 = load double*, double** %2, align 4
+  %v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4
+  ret double %v.sroa.0.0.copyload
+}
+
+declare void @llvm.va_start(i8*)
+
+attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64"  }
+
+; Ensures that the machine scheduler does not move accessing the upper
+; 32 bits of the double to before actually storing it to memory
+
+; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4>
+; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)>
+; CHECK: INLINEASM
+; CHECK: (load 4 from [[MEM]] + 4)
+; CHECK-NOT: (store 4 into [[MEM]] + 4)
+
+
author	Diogo N. Sampaio <diogo.sampaio@arm.com>	2019-07-18 10:05:56 +0000
committer	Diogo N. Sampaio <diogo.sampaio@arm.com>	2019-07-18 10:05:56 +0000
commit	11512e742b283a2845f1afa6242c63efcd2ac102 (patch)
tree	d4ebfac998cc9697973fa7b6f68eb6e59fb76c40 /llvm
parent	83748cc5abc199a5219b0e7d9ba308984a8df613 (diff)
download	bcm5719-llvm-11512e742b283a2845f1afa6242c63efcd2ac102.tar.gz bcm5719-llvm-11512e742b283a2845f1afa6242c63efcd2ac102.zip