ARM has a peephole optimization which looks for a def / use pair. The def

produces a 32-bit immediate which is consumed by the use. It tries to fold the immediate by breaking it into two parts and fold them into the immmediate fields of two uses. e.g movw r2, #40885 movt r3, #46540 add r0, r0, r3 => add.w r0, r0, #3019898880 add.w r0, r0, #30146560 ; However, this transformation is incorrect if the user produces a flag. e.g. movw r2, #40885 movt r3, #46540 adds r0, r0, r3 => add.w r0, r0, #3019898880 adds.w r0, r0, #30146560 Note the adds.w may not set the carry flag even if the original sequence would. rdar://11116189 llvm-svn: 153484
author: Evan Cheng <evan.cheng@apple.com> 2012-03-26 23:31:00 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2012-03-26 23:31:00 +0000
commit: a2b48d985b46f2400ac7183e64a38b32bd00edd2 (patch)
tree: 892ea7a69770545902895ab9c9de5b6e9ef66449
parent: 95e021faf57e1f68d6b862323b048e3fcf83a15a (diff)
download: bcm5719-llvm-a2b48d985b46f2400ac7183e64a38b32bd00edd2.tar.gz
bcm5719-llvm-a2b48d985b46f2400ac7183e64a38b32bd00edd2.zip
2 files changed, 52 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6c97261cf90..17ff7192b1e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1916,6 +1916,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
   if (!MRI->hasOneNonDBGUse(Reg))
     return false;
 
+  const MCInstrDesc &DefMCID = DefMI->getDesc();
+  if (DefMCID.hasOptionalDef()) {
+    unsigned NumOps = DefMCID.getNumOperands();
+    const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+    if (MO.getReg() == ARM::CPSR && !MO.isDead())
+      // If DefMI defines CPSR and it is not dead, it's obviously not safe
+      // to delete DefMI.
+      return false;
+  }
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+  if (UseMCID.hasOptionalDef()) {
+    unsigned NumOps = UseMCID.getNumOperands();
+    if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+      // If the instruction sets the flag, do not attempt this optimization
+      // since it may change the semantics of the code.
+      return false;
+  }
+
   unsigned UseOpc = UseMI->getOpcode();
   unsigned NewUseOpc = 0;
   uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
diff --git a/llvm/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/llvm/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 00000000000..0ff4f510eb3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
author	Evan Cheng <evan.cheng@apple.com>	2012-03-26 23:31:00 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2012-03-26 23:31:00 +0000
commit	a2b48d985b46f2400ac7183e64a38b32bd00edd2 (patch)
tree	892ea7a69770545902895ab9c9de5b6e9ef66449
parent	95e021faf57e1f68d6b862323b048e3fcf83a15a (diff)
download	bcm5719-llvm-a2b48d985b46f2400ac7183e64a38b32bd00edd2.tar.gz bcm5719-llvm-a2b48d985b46f2400ac7183e64a38b32bd00edd2.zip