[FastISel][AArch64] Add custom lowering for GEPs.

This is mostly a copy of the existing FastISel GEP code, but on AArch64 we bail out even for simple cases, because the standard fastEmit functions don't cover MUL and ADD is lowered inefficientily. llvm-svn: 219726
author: Juergen Ributzka <juergen@apple.com> 2014-10-14 21:41:23 +0000
committer: Juergen Ributzka <juergen@apple.com> 2014-10-14 21:41:23 +0000
commit: 4dfd590eaaa38aaa2ce7e09058d52780e445ae96 (patch)
tree: 7b015c20216cc65cd696fc7f0fd0460234c16a96 /llvm
parent: f6aafeee6029bece1133250f3d9a65bd059c68fa (diff)
download: bcm5719-llvm-4dfd590eaaa38aaa2ce7e09058d52780e445ae96.tar.gz
bcm5719-llvm-4dfd590eaaa38aaa2ce7e09058d52780e445ae96.zip
3 files changed, 105 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index faf8ded4541..b074be9419c 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -134,6 +134,7 @@ private:
   bool selectBitCast(const Instruction *I);
   bool selectFRem(const Instruction *I);
   bool selectSDiv(const Instruction *I);
+  bool selectGetElementPtr(const Instruction *I);
 
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
@@ -4541,6 +4542,88 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
   return true;
 }
 
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (!N)
+    return false;
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+  // into a single N = N + TotalOffset.
+  uint64_t TotalOffs = 0;
+  Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (auto *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      // N = N + Offset
+      if (Field)
+        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+      // If this is a constant subscript, handle it quickly.
+      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero())
+          continue;
+        // N = N + Offset
+        TotalOffs +=
+            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+        continue;
+      }
+      if (TotalOffs) {
+        N = emitAddSub_ri(/*UseAdd=*/true, VT, N, NIsKill, TotalOffs);
+        if (!N) {
+          unsigned C = fastEmit_i(VT, VT, ISD::Constant, TotalOffs);
+          if (!C)
+            return false;
+          N = emitAddSub_rr(/*UseAdd=*/true, VT, N, NIsKill, C, true);
+          if (!N)
+            return false;
+        }
+        NIsKill = true;
+        TotalOffs = 0;
+      }
+
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
+      if (!IdxN)
+        return false;
+
+      if (ElementSize != 1) {
+        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+        if (!C)
+          return false;
+        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+        if (!IdxN)
+          return false;
+        IdxNIsKill = true;
+      }
+      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+      if (!N)
+        return false;
+    }
+  }
+  if (TotalOffs) {
+    N = emitAddSub_ri(/*UseAdd=*/true, VT, N, NIsKill, TotalOffs);
+    if (!N) {
+      unsigned C = fastEmit_i(VT, VT, ISD::Constant, TotalOffs);
+      if (!C)
+        return false;
+      N = emitAddSub_rr(/*UseAdd=*/true, VT, N, NIsKill, C, true);
+      if (!N)
+        return false;
+    }
+  }
+
+  updateValueMap(I, N);
+  return true;
+}
+
 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
   switch (I->getOpcode()) {
   default:
@@ -4612,6 +4695,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
     return selectRet(I);
   case Instruction::FRem:
     return selectFRem(I);
+  case Instruction::GetElementPtr:
+    return selectGetElementPtr(I);
   }
 
   // fall-back to target-independent instruction selection.
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
index 34394b2af0a..a8417027ce2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -15,9 +15,8 @@ define void @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: mov x29, sp
-; CHECK: mov x[[REG:[0-9]+]], sp
-; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
-; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
+; CHECK: mov [[REG:x[0-9]+]], sp
+; CHECK-NEXT: add x0, [[REG]], #8
   %E = alloca %struct.S2Ty, align 4
   %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
   call void @takeS1(%struct.S1Ty* %B)
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
new file mode 100644
index 00000000000..d72019801ba
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+%struct.foo = type { i32, i64, float, double }
+
+define double* @test_struct(%struct.foo* %f) {
+; CHECK-LABEL: test_struct
+; CHECK:       add x0, x0, #24
+  %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
+  ret double* %1
+}
+
+define i32* @test_array(i32* %a, i64 %i) {
+; CHECK-LABEL: test_array
+; CHECK:       orr [[REG:x[0-9]+]], xzr, #0x4
+; CHECK-NEXT:  madd  x0, x1, [[REG]], x0
+  %1 = getelementptr inbounds i32* %a, i64 %i
+  ret i32* %1
+}
author	Juergen Ributzka <juergen@apple.com>	2014-10-14 21:41:23 +0000
committer	Juergen Ributzka <juergen@apple.com>	2014-10-14 21:41:23 +0000
commit	4dfd590eaaa38aaa2ce7e09058d52780e445ae96 (patch)
tree	7b015c20216cc65cd696fc7f0fd0460234c16a96 /llvm
parent	f6aafeee6029bece1133250f3d9a65bd059c68fa (diff)
download	bcm5719-llvm-4dfd590eaaa38aaa2ce7e09058d52780e445ae96.tar.gz bcm5719-llvm-4dfd590eaaa38aaa2ce7e09058d52780e445ae96.zip