Reapply "[FastISel][AArch64] Add custom lowering for GEPs."

This is mostly a copy of the existing FastISel GEP code, but we have to duplicate it for AArch64, because otherwise we would bail out even for simple cases. This is because the standard fastEmit functions don't cover MUL at all and ADD is lowered very inefficientily. The original commit had a bug in the add emit logic, which has been fixed. llvm-svn: 219831
author: Juergen Ributzka <juergen@apple.com> 2014-10-15 18:58:07 +0000
committer: Juergen Ributzka <juergen@apple.com> 2014-10-15 18:58:07 +0000
commit: f82c987a5cc1ecce7b5e3e11fff974e278e89c81 (patch)
tree: 08cda2e4da29f9a9f9d0743045d87a4f029217af
parent: 6780f0f7a09add4fb38067c06c28e05742ee1f7d (diff)
download: bcm5719-llvm-f82c987a5cc1ecce7b5e3e11fff974e278e89c81.tar.gz
bcm5719-llvm-f82c987a5cc1ecce7b5e3e11fff974e278e89c81.zip
3 files changed, 119 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index ed6b5a53c5b..46263020300 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -134,6 +134,7 @@ private:
   bool selectBitCast(const Instruction *I);
   bool selectFRem(const Instruction *I);
   bool selectSDiv(const Instruction *I);
+  bool selectGetElementPtr(const Instruction *I);
 
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
@@ -4556,6 +4557,79 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
   return true;
 }
 
+/// This is mostly a copy of the existing FastISel GEP code, but we have to
+/// duplicate it for AArch64, because otherwise we would bail out even for
+/// simple cases. This is because the standard fastEmit functions don't cover
+/// MUL at all and ADD is lowered very inefficientily.
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (!N)
+    return false;
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+  // into a single N = N + TotalOffset.
+  uint64_t TotalOffs = 0;
+  Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (auto *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      // N = N + Offset
+      if (Field)
+        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+      // If this is a constant subscript, handle it quickly.
+      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero())
+          continue;
+        // N = N + Offset
+        TotalOffs +=
+            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+        continue;
+      }
+      if (TotalOffs) {
+        N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+        if (!N)
+          return false;
+        NIsKill = true;
+        TotalOffs = 0;
+      }
+
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
+      if (!IdxN)
+        return false;
+
+      if (ElementSize != 1) {
+        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+        if (!C)
+          return false;
+        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+        if (!IdxN)
+          return false;
+        IdxNIsKill = true;
+      }
+      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+      if (!N)
+        return false;
+    }
+  }
+  if (TotalOffs) {
+    N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+    if (!N)
+      return false;
+  }
+  updateValueMap(I, N);
+  return true;
+}
+
 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
   switch (I->getOpcode()) {
   default:
@@ -4627,6 +4701,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
     return selectRet(I);
   case Instruction::FRem:
     return selectFRem(I);
+  case Instruction::GetElementPtr:
+    return selectGetElementPtr(I);
   }
 
   // fall-back to target-independent instruction selection.
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
index 34394b2af0a..a8417027ce2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -15,9 +15,8 @@ define void @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: mov x29, sp
-; CHECK: mov x[[REG:[0-9]+]], sp
-; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
-; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
+; CHECK: mov [[REG:x[0-9]+]], sp
+; CHECK-NEXT: add x0, [[REG]], #8
   %E = alloca %struct.S2Ty, align 4
   %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
   call void @takeS1(%struct.S1Ty* %B)
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
new file mode 100644
index 00000000000..465f595245c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+%struct.foo = type { i32, i64, float, double }
+
+define double* @test_struct(%struct.foo* %f) {
+; CHECK-LABEL: test_struct
+; CHECK:       add x0, x0, #24
+  %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
+  ret double* %1
+}
+
+define i32* @test_array1(i32* %a, i64 %i) {
+; CHECK-LABEL: test_array1
+; CHECK:       orr [[REG:x[0-9]+]], xzr, #0x4
+; CHECK-NEXT:  madd  x0, x1, [[REG]], x0
+  %1 = getelementptr inbounds i32* %a, i64 %i
+  ret i32* %1
+}
+
+define i32* @test_array2(i32* %a) {
+; CHECK-LABEL: test_array2
+; CHECK:       add  x0, x0, #16
+  %1 = getelementptr inbounds i32* %a, i64 4
+  ret i32* %1
+}
+
+define i32* @test_array3(i32* %a) {
+; CHECK-LABEL: test_array3
+; CHECK:       add x0, x0, #1, lsl #12
+  %1 = getelementptr inbounds i32* %a, i64 1024
+  ret i32* %1
+}
+
+define i32* @test_array4(i32* %a) {
+; CHECK-LABEL: test_array4
+; CHECK:       movz [[REG:x[0-9]+]], #0x1008
+; CHECK-NEXR:  add x0, x0, [[REG]]
+  %1 = getelementptr inbounds i32* %a, i64 1026
+  ret i32* %1
+}
+
author	Juergen Ributzka <juergen@apple.com>	2014-10-15 18:58:07 +0000
committer	Juergen Ributzka <juergen@apple.com>	2014-10-15 18:58:07 +0000
commit	f82c987a5cc1ecce7b5e3e11fff974e278e89c81 (patch)
tree	08cda2e4da29f9a9f9d0743045d87a4f029217af
parent	6780f0f7a09add4fb38067c06c28e05742ee1f7d (diff)
download	bcm5719-llvm-f82c987a5cc1ecce7b5e3e11fff974e278e89c81.tar.gz bcm5719-llvm-f82c987a5cc1ecce7b5e3e11fff974e278e89c81.zip