diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FastISel.cpp | 457 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll | 425 |
2 files changed, 714 insertions, 168 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 41588d78bb6..25970b2378e 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -41,7 +41,6 @@ using namespace llvm; namespace { class AArch64FastISel : public FastISel { - class Address { public: typedef enum { @@ -51,17 +50,23 @@ class AArch64FastISel : public FastISel { private: BaseKind Kind; + AArch64_AM::ShiftExtendType ExtType; union { unsigned Reg; int FI; } Base; + unsigned OffsetReg; + unsigned Shift; int64_t Offset; const GlobalValue *GV; public: - Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; } + Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), + OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } void setKind(BaseKind K) { Kind = K; } BaseKind getKind() const { return Kind; } + void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } + AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } bool isRegBase() const { return Kind == RegBase; } bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { @@ -72,6 +77,14 @@ class AArch64FastISel : public FastISel { assert(isRegBase() && "Invalid base register access!"); return Base.Reg; } + void setOffsetReg(unsigned Reg) { + assert(isRegBase() && "Invalid offset register access!"); + OffsetReg = Reg; + } + unsigned getOffsetReg() const { + assert(isRegBase() && "Invalid offset register access!"); + return OffsetReg; + } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); Base.FI = FI; @@ -82,11 +95,11 @@ class AArch64FastISel : public FastISel { } void setOffset(int64_t O) { Offset = O; } int64_t getOffset() { return Offset; } + void setShift(unsigned S) { Shift = S; } + unsigned getShift() { return Shift; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() { return GV; } - - bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } }; /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can @@ -121,13 +134,12 @@ private: // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); - bool ComputeAddress(const Value *Obj, Address &Addr); + bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); bool ComputeCallAddress(const Value *V, Address &Addr); - bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled); + bool SimplifyAddress(Address &Addr, MVT VT); void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags, MachineMemOperand *MMO, - bool UseUnscaled); + unsigned Flags, unsigned ScaleFactor, + MachineMemOperand *MMO); bool IsMemCpySmall(uint64_t Len, unsigned Alignment); bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, unsigned Alignment); @@ -137,9 +149,9 @@ private: // Emit functions. bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - MachineMemOperand *MMO = nullptr, bool UseUnscaled = false); + MachineMemOperand *MMO = nullptr); bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, - MachineMemOperand *MMO = nullptr, bool UseUnscaled = false); + MachineMemOperand *MMO = nullptr); unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, @@ -346,7 +358,8 @@ unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) { } // Computes the address to get to an object. -bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { +bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty) +{ const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(Obj)) { @@ -373,18 +386,18 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { break; case Instruction::BitCast: { // Look through bitcasts. - return ComputeAddress(U->getOperand(0), Addr); + return ComputeAddress(U->getOperand(0), Addr, Ty); } case Instruction::IntToPtr: { // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); + return ComputeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::PtrToInt: { // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); + return ComputeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::GetElementPtr: { @@ -426,7 +439,7 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { // Try to grab the base operand now. Addr.setOffset(TmpOffset); - if (ComputeAddress(U->getOperand(0), Addr)) + if (ComputeAddress(U->getOperand(0), Addr, Ty)) return true; // We failed, restore everything and try the other options. @@ -446,19 +459,86 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { } break; } - case Instruction::Add: + case Instruction::Add: { // Adds of constants are common and easy enough. - if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (isa<ConstantInt>(LHS)) + std::swap(LHS, RHS); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue()); - return ComputeAddress(U->getOperand(0), Addr); + return ComputeAddress(LHS, Addr, Ty); + } + + Address Backup = Addr; + if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty)) + return true; + Addr = Backup; + + break; + } + case Instruction::Shl: + if (Addr.getOffsetReg()) + break; + + if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + unsigned Val = CI->getZExtValue(); + if (Val < 1 || Val > 3) + break; + + uint64_t NumBytes = 0; + if (Ty && Ty->isSized()) { + uint64_t NumBits = DL.getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } + + if (NumBytes != (1UL << Val)) + break; + + Addr.setShift(Val); + Addr.setExtendType(AArch64_AM::LSL); + + if (const auto *I = dyn_cast<Instruction>(U->getOperand(0))) + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) + U = I; + + if (const auto *ZE = dyn_cast<ZExtInst>(U)) + if (ZE->getOperand(0)->getType()->isIntegerTy(32)) + Addr.setExtendType(AArch64_AM::UXTW); + + if (const auto *SE = dyn_cast<SExtInst>(U)) + if (SE->getOperand(0)->getType()->isIntegerTy(32)) + Addr.setExtendType(AArch64_AM::SXTW); + + unsigned Reg = getRegForValue(U->getOperand(0)); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; } break; } - // Try to get this in a register if nothing else has worked. - if (!Addr.isValid()) - Addr.setReg(getRegForValue(Obj)); - return Addr.isValid(); + if (Addr.getReg()) { + if (!Addr.getOffsetReg()) { + unsigned Reg = getRegForValue(Obj); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; + } + return false; + } + + unsigned Reg = getRegForValue(Obj); + if (!Reg) + return false; + Addr.setReg(Reg); + return true; } bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) { @@ -540,50 +620,80 @@ bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { return false; } -bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, - int64_t ScaleFactor, bool UseUnscaled) { - bool needsLowering = false; - int64_t Offset = Addr.getOffset(); +bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) { + unsigned ScaleFactor; switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::f32: - case MVT::f64: - if (!UseUnscaled) - // Using scaled, 12-bit, unsigned immediate offsets. - needsLowering = ((Offset & 0xfff) != Offset); - else - // Using unscaled, 9-bit, signed immediate offsets. - needsLowering = (Offset > 256 || Offset < -256); - break; + default: return false; + case MVT::i1: // fall-through + case MVT::i8: ScaleFactor = 1; break; + case MVT::i16: ScaleFactor = 2; break; + case MVT::i32: // fall-through + case MVT::f32: ScaleFactor = 4; break; + case MVT::i64: // fall-through + case MVT::f64: ScaleFactor = 8; break; } - //If this is a stack pointer and the offset needs to be simplified then put + bool ImmediateOffsetNeedsLowering = false; + bool RegisterOffsetNeedsLowering = false; + int64_t Offset = Addr.getOffset(); + if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) + ImmediateOffsetNeedsLowering = true; + else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && + !isUInt<12>(Offset / ScaleFactor)) + ImmediateOffsetNeedsLowering = true; + + // Cannot encode an offset register and an immediate offset in the same + // instruction. Fold the immediate offset into the load/store instruction and + // emit an additonal add to take care of the offset register. + if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() && + Addr.getOffsetReg()) + RegisterOffsetNeedsLowering = true; + + // If this is a stack pointer and the offset needs to be simplified then put // the alloca address into a register, set the base type back to register and // continue. This should almost never happen. - if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { + if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) { unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), ResultReg) - .addFrameIndex(Addr.getFI()) - .addImm(0) - .addImm(0); + .addFrameIndex(Addr.getFI()) + .addImm(0) + .addImm(0); Addr.setKind(Address::RegBase); Addr.setReg(ResultReg); } + if (RegisterOffsetNeedsLowering) { + unsigned ResultReg = 0; + if (Addr.getReg()) { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::ADDXrs), ResultReg) + .addReg(Addr.getReg()) + .addReg(Addr.getOffsetReg()) + .addImm(Addr.getShift()); + } else + ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(), + /*Op0IsKill=*/false, Addr.getShift()); + if (!ResultReg) + return false; + + Addr.setReg(ResultReg); + Addr.setOffsetReg(0); + Addr.setShift(0); + } + // Since the offset is too large for the load/store instruction get the // reg+offset into a register. - if (needsLowering) { - uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor; - unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false, - UnscaledOffset, MVT::i64); - if (ResultReg == 0) + if (ImmediateOffsetNeedsLowering) { + unsigned ResultReg = 0; + if (Addr.getReg()) + ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), + /*IsKill=*/false, Offset, MVT::i64); + else + ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); + + if (!ResultReg) return false; Addr.setReg(ResultReg); Addr.setOffset(0); @@ -594,11 +704,11 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, void AArch64FastISel::AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, - MachineMemOperand *MMO, - bool UseUnscaled) { - int64_t Offset = Addr.getOffset(); + unsigned ScaleFactor, + MachineMemOperand *MMO) { + int64_t Offset = Addr.getOffset() / ScaleFactor; // Frame base works a bit differently. Handle it separately. - if (Addr.getKind() == Address::FrameIndexBase) { + if (Addr.isFIBase()) { int FI = Addr.getFI(); // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size // and alignment should be based on the VT. @@ -608,9 +718,19 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI).addImm(Offset); } else { - // Now add the rest of the operands. - MIB.addReg(Addr.getReg()); - MIB.addImm(Offset); + assert(Addr.isRegBase() && "Unexpected address kind."); + if (Addr.getOffsetReg()) { + assert(Addr.getOffset() == 0 && "Unexpected offset"); + bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || + Addr.getExtendType() == AArch64_AM::SXTX; + MIB.addReg(Addr.getReg()); + MIB.addReg(Addr.getOffsetReg()); + MIB.addImm(IsSigned); + MIB.addImm(Addr.getShift() != 0); + } else { + MIB.addReg(Addr.getReg()); + MIB.addImm(Offset); + } } if (MMO) @@ -618,72 +738,68 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr, } bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - MachineMemOperand *MMO, bool UseUnscaled) { + MachineMemOperand *MMO) { + // Simplify this down to something we can handle. + if (!SimplifyAddress(Addr, VT)) + return false; + + unsigned ScaleFactor; + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type."); + case MVT::i1: // fall-through + case MVT::i8: ScaleFactor = 1; break; + case MVT::i16: ScaleFactor = 2; break; + case MVT::i32: // fall-through + case MVT::f32: ScaleFactor = 4; break; + case MVT::i64: // fall-through + case MVT::f64: ScaleFactor = 8; break; + } + // Negative offsets require unscaled, 9-bit, signed immediate offsets. // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; + bool UseScaled = true; + if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { + UseScaled = false; + ScaleFactor = 1; + } + + static const unsigned OpcTable[4][6] = { + { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi, + AArch64::LDURSi, AArch64::LDURDi }, + { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui, + AArch64::LDRSui, AArch64::LDRDui }, + { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX, + AArch64::LDRSroX, AArch64::LDRDroX }, + { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW, + AArch64::LDRSroW, AArch64::LDRDroW } + }; unsigned Opc; const TargetRegisterClass *RC; bool VTIsi1 = false; - int64_t ScaleFactor = 0; - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - // Intentional fall-through. - case MVT::i8: - Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui; - RC = &AArch64::GPR32RegClass; - ScaleFactor = 1; - break; - case MVT::i16: - Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui; - RC = &AArch64::GPR32RegClass; - ScaleFactor = 2; - break; - case MVT::i32: - Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui; - RC = &AArch64::GPR32RegClass; - ScaleFactor = 4; - break; - case MVT::i64: - Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui; - RC = &AArch64::GPR64RegClass; - ScaleFactor = 8; - break; - case MVT::f32: - Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 4; - break; - case MVT::f64: - Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 8; - break; - } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitLoad(VT, ResultReg, Addr, MMO, /*UseUnscaled*/ true); + bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && + Addr.getOffsetReg(); + unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; + if (Addr.getExtendType() == AArch64_AM::UXTW || + Addr.getExtendType() == AArch64_AM::SXTW) + Idx++; - Addr.setOffset(Offset / ScaleFactor); + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type."); + case MVT::i1: VTIsi1 = true; // Intentional fall-through. + case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break; + case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break; + case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break; + case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break; + case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break; + case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break; } - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; - // Create the base instruction, then add the operands. ResultReg = createResultReg(RC); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, MMO, UseUnscaled); + AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); // Loading an i1 requires special handling. if (VTIsi1) { @@ -691,8 +807,8 @@ bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) - .addReg(ResultReg) - .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + .addReg(ResultReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); ResultReg = ANDReg; } return true; @@ -708,7 +824,7 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) { // See if we can handle this address. Address Addr; - if (!ComputeAddress(I->getOperand(0), Addr)) + if (!ComputeAddress(I->getOperand(0), Addr, I->getType())) return false; unsigned ResultReg; @@ -720,59 +836,63 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) { } bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, - MachineMemOperand *MMO, bool UseUnscaled) { - // Negative offsets require unscaled, 9-bit, signed immediate offsets. - // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; + MachineMemOperand *MMO) { + // Simplify this down to something we can handle. + if (!SimplifyAddress(Addr, VT)) + return false; - unsigned StrOpc; - bool VTIsi1 = false; - int64_t ScaleFactor = 0; - // Using scaled, 12-bit, unsigned immediate offsets. + unsigned ScaleFactor; switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - case MVT::i8: - StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui; - ScaleFactor = 1; - break; - case MVT::i16: - StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui; - ScaleFactor = 2; - break; - case MVT::i32: - StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui; - ScaleFactor = 4; - break; - case MVT::i64: - StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui; - ScaleFactor = 8; - break; - case MVT::f32: - StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui; - ScaleFactor = 4; - break; - case MVT::f64: - StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui; - ScaleFactor = 8; - break; + default: llvm_unreachable("Unexpected value type."); + case MVT::i1: // fall-through + case MVT::i8: ScaleFactor = 1; break; + case MVT::i16: ScaleFactor = 2; break; + case MVT::i32: // fall-through + case MVT::f32: ScaleFactor = 4; break; + case MVT::i64: // fall-through + case MVT::f64: ScaleFactor = 8; break; } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitStore(VT, SrcReg, Addr, MMO, /*UseUnscaled*/ true); - Addr.setOffset(Offset / ScaleFactor); + // Negative offsets require unscaled, 9-bit, signed immediate offsets. + // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. + bool UseScaled = true; + if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { + UseScaled = false; + ScaleFactor = 1; } - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; + + static const unsigned OpcTable[4][6] = { + { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, + AArch64::STURSi, AArch64::STURDi }, + { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, + AArch64::STRSui, AArch64::STRDui }, + { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, + AArch64::STRSroX, AArch64::STRDroX }, + { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, + AArch64::STRSroW, AArch64::STRDroW } + + }; + + unsigned Opc; + bool VTIsi1 = false; + bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && + Addr.getOffsetReg(); + unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; + if (Addr.getExtendType() == AArch64_AM::UXTW || + Addr.getExtendType() == AArch64_AM::SXTW) + Idx++; + + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type."); + case MVT::i1: VTIsi1 = true; + case MVT::i8: Opc = OpcTable[Idx][0]; break; + case MVT::i16: Opc = OpcTable[Idx][1]; break; + case MVT::i32: Opc = OpcTable[Idx][2]; break; + case MVT::i64: Opc = OpcTable[Idx][3]; break; + case MVT::f32: Opc = OpcTable[Idx][4]; break; + case MVT::f64: Opc = OpcTable[Idx][5]; break; + } // Storing an i1 requires special handling. if (VTIsi1) { @@ -780,14 +900,15 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), ANDReg) - .addReg(SrcReg) - .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + .addReg(SrcReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); SrcReg = ANDReg; } // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(StrOpc)).addReg(SrcReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, MMO, UseUnscaled); + TII.get(Opc)) + .addReg(SrcReg); + AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); return true; } @@ -809,7 +930,7 @@ bool AArch64FastISel::SelectStore(const Instruction *I) { // See if we can handle this address. Address Addr; - if (!ComputeAddress(I->getOperand(1), Addr)) + if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) return false; if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) diff --git a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll new file mode 100644 index 00000000000..11b95f92bf2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -0,0 +1,425 @@ +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST + +; Load / Store Base Register only +define zeroext i1 @load_breg_i1(i1* %a) { +; CHECK-LABEL: load_breg_i1 +; CHECK: ldrb {{w[0-9]+}}, [x0] + %1 = load i1* %a + ret i1 %1 +} + +define zeroext i8 @load_breg_i8(i8* %a) { +; CHECK-LABEL: load_breg_i8 +; CHECK: ldrb {{w[0-9]+}}, [x0] + %1 = load i8* %a + ret i8 %1 +} + +define zeroext i16 @load_breg_i16(i16* %a) { +; CHECK-LABEL: load_breg_i16 +; CHECK: ldrh {{w[0-9]+}}, [x0] + %1 = load i16* %a + ret i16 %1 +} + +define i32 @load_breg_i32(i32* %a) { +; CHECK-LABEL: load_breg_i32 +; CHECK: ldr {{w[0-9]+}}, [x0] + %1 = load i32* %a + ret i32 %1 +} + +define i64 @load_breg_i64(i64* %a) { +; CHECK-LABEL: load_breg_i64 +; CHECK: ldr {{x[0-9]+}}, [x0] + %1 = load i64* %a + ret i64 %1 +} + +define float @load_breg_f32(float* %a) { +; CHECK-LABEL: load_breg_f32 +; CHECK: ldr {{s[0-9]+}}, [x0] + %1 = load float* %a + ret float %1 +} + +define double @load_breg_f64(double* %a) { +; CHECK-LABEL: load_breg_f64 +; CHECK: ldr {{d[0-9]+}}, [x0] + %1 = load double* %a + ret double %1 +} + +define void @store_breg_i1(i1* %a) { +; CHECK-LABEL: store_breg_i1 +; CHECK: strb {{wzr|w[0-9]+}}, [x0] + store i1 0, i1* %a + ret void +} + +define void @store_breg_i8(i8* %a) { +; CHECK-LABEL: store_breg_i8 +; CHECK: strb wzr, [x0] + store i8 0, i8* %a + ret void +} + +define void @store_breg_i16(i16* %a) { +; CHECK-LABEL: store_breg_i16 +; CHECK: strh wzr, [x0] + store i16 0, i16* %a + ret void +} + +define void @store_breg_i32(i32* %a) { +; CHECK-LABEL: store_breg_i32 +; CHECK: str wzr, [x0] + store i32 0, i32* %a + ret void +} + +define void @store_breg_i64(i64* %a) { +; CHECK-LABEL: store_breg_i64 +; CHECK: str xzr, [x0] + store i64 0, i64* %a + ret void +} + +define void @store_breg_f32(float* %a) { +; CHECK-LABEL: store_breg_f32 +; CHECK: str {{wzr|s[0-9]+}}, [x0] + store float 0.0, float* %a + ret void +} + +define void @store_breg_f64(double* %a) { +; CHECK-LABEL: store_breg_f64 +; CHECK: str {{xzr|d[0-9]+}}, [x0] + store double 0.0, double* %a + ret void +} + +; Load / Store Base Register + Immediate Offset +; Max supported negative offset +define i32 @load_breg_immoff_1(i64 %a) { +; CHECK-LABEL: load_breg_immoff_1 +; CHECK: ldur {{w[0-9]+}}, [x0, #-256] + %1 = add i64 %a, -256 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Min not-supported negative offset +define i32 @load_breg_immoff_2(i64 %a) { +; SDAG-LABEL: load_breg_immoff_2 +; SDAG: sub [[REG:x[0-9]+]], x0, #257 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: load_breg_immoff_2 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, -257 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Max supported unscaled offset +define i32 @load_breg_immoff_3(i64 %a) { +; CHECK-LABEL: load_breg_immoff_3 +; CHECK: ldur {{w[0-9]+}}, [x0, #255] + %1 = add i64 %a, 255 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Min un-supported unscaled offset +define i32 @load_breg_immoff_4(i64 %a) { +; SDAG-LABEL: load_breg_immoff_4 +; SDAG: add [[REG:x[0-9]+]], x0, #257 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: load_breg_immoff_4 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, 257 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Max supported scaled offset +define i32 @load_breg_immoff_5(i64 %a) { +; CHECK-LABEL: load_breg_immoff_5 +; CHECK: ldr {{w[0-9]+}}, [x0, #16380] + %1 = add i64 %a, 16380 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Min un-supported scaled offset +define i32 @load_breg_immoff_6(i64 %a) { +; SDAG-LABEL: load_breg_immoff_6 +; SDAG: add [[REG:x[0-9]+]], x0, #4, lsl #12 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: load_breg_immoff_6 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, 16384 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + +; Max supported negative offset +define void @store_breg_immoff_1(i64 %a) { +; CHECK-LABEL: store_breg_immoff_1 +; CHECK: stur wzr, [x0, #-256] + %1 = add i64 %a, -256 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; Min not-supported negative offset +define void @store_breg_immoff_2(i64 %a) { +; SDAG-LABEL: store_breg_immoff_2 +; SDAG: sub [[REG:x[0-9]+]], x0, #257 +; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: store_breg_immoff_2 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, -257 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; Max supported unscaled offset +define void @store_breg_immoff_3(i64 %a) { +; CHECK-LABEL: store_breg_immoff_3 +; CHECK: stur wzr, [x0, #255] + %1 = add i64 %a, 255 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; Min un-supported unscaled offset +define void @store_breg_immoff_4(i64 %a) { +; SDAG-LABEL: store_breg_immoff_4 +; SDAG: add [[REG:x[0-9]+]], x0, #257 +; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: store_breg_immoff_4 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, 257 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; Max supported scaled offset +define void @store_breg_immoff_5(i64 %a) { +; CHECK-LABEL: store_breg_immoff_5 +; CHECK: str wzr, [x0, #16380] + %1 = add i64 %a, 16380 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; Min un-supported scaled offset +define void @store_breg_immoff_6(i64 %a) { +; SDAG-LABEL: store_breg_immoff_6 +; SDAG: add [[REG:x[0-9]+]], x0, #4, lsl #12 +; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}} +; FAST-LABEL: store_breg_immoff_6 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}} + %1 = add i64 %a, 16384 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +define i64 @load_breg_immoff_7(i64 %a) { +; CHECK-LABEL: load_breg_immoff_7 +; CHECK: ldr {{x[0-9]+}}, [x0, #48] + %1 = add i64 %a, 48 + %2 = inttoptr i64 %1 to i64* + %3 = load i64* %2 + ret i64 %3 +} + +; Flip add operands +define i64 @load_breg_immoff_8(i64 %a) { +; CHECK-LABEL: load_breg_immoff_8 +; CHECK: ldr {{x[0-9]+}}, [x0, #48] + %1 = add i64 48, %a + %2 = inttoptr i64 %1 to i64* + %3 = load i64* %2 + ret i64 %3 +} + +; Load Base Register + Register Offset +define i64 @load_breg_offreg_1(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_offreg_1 +; CHECK: ldr {{x[0-9]+}}, [x0, x1] + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i64* + %3 = load i64* %2 + ret i64 %3 +} + +; Flip add operands +define i64 @load_breg_offreg_2(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_offreg_2 +; CHECK: ldr {{x[0-9]+}}, [x1, x0] + %1 = add i64 %b, %a + %2 = inttoptr i64 %1 to i64* + %3 = load i64* %2 + ret i64 %3 +} + +; Load Base Register + Register Offset + Immediate Offset +define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_offreg_immoff_1 +; CHECK: add [[REG:x[0-9]+]], x0, x1 +; CHECK-NEXT: ldr x0, {{\[}}[[REG]], #48{{\]}} + %1 = add i64 %a, %b + %2 = add i64 %1, 48 + %3 = inttoptr i64 %2 to i64* + %4 = load i64* %3 + ret i64 %4 +} + +define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) { +; SDAG-LABEL: load_breg_offreg_immoff_2 +; SDAG: add [[REG1:x[0-9]+]], x0, x1 +; SDAG-NEXT: add [[REG2:x[0-9]+]], [[REG1]], #15, lsl #12 +; SDAG-NEXT: ldr x0, {{\[}}[[REG2]]{{\]}} +; FAST-LABEL: load_breg_offreg_immoff_2 +; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: ldr x0, {{\[}}[[REG]], x1{{\]}} + %1 = add i64 %a, %b + %2 = add i64 %1, 61440 + %3 = inttoptr i64 %2 to i64* + %4 = load i64* %3 + ret i64 %4 +} + +; Load Base Register + Scaled Register Offset +define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_shift_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2] + %1 = shl i64 %a, 2 + %2 = add i64 %1, %b + %3 = inttoptr i64 %2 to i32* + %4 = load i32* %3 + ret i32 %4 +} + +define i32 @load_breg_shift_offreg_2(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_shift_offreg_2 +; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2] + %1 = shl i64 %a, 2 + %2 = add i64 %b, %1 + %3 = inttoptr i64 %2 to i32* + %4 = load i32* %3 + ret i32 %4 +} + +define i32 @load_breg_shift_offreg_3(i64 %a, i64 %b) { +; SDAG-LABEL: load_breg_shift_offreg_3 +; SDAG: lsl [[REG:x[0-9]+]], x0, #2 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}} +; FAST-LABEL: load_breg_shift_offreg_3 +; FAST: lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}} + %1 = shl i64 %a, 2 + %2 = shl i64 %b, 2 + %3 = add i64 %1, %2 + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + +define i32 @load_breg_shift_offreg_4(i64 %a, i64 %b) { +; SDAG-LABEL: load_breg_shift_offreg_4 +; SDAG: lsl [[REG:x[0-9]+]], x1, #2 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}} +; FAST-LABEL: load_breg_shift_offreg_4 +; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}} + %1 = shl i64 %a, 2 + %2 = shl i64 %b, 2 + %3 = add i64 %2, %1 + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + +define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) { +; SDAG-LABEL: load_breg_shift_offreg_5 +; SDAG: lsl [[REG:x[0-9]+]], x1, #3 +; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}} +; FAST-LABEL: load_breg_shift_offreg_5 +; FAST: lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}} +; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}} + %1 = shl i64 %a, 2 + %2 = shl i64 %b, 3 + %3 = add i64 %1, %2 + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + + +; Load Base Register + Scaled Register Offset + Sign/Zero extension +define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_zext_shift_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2] + %1 = zext i32 %a to i64 + %2 = shl i64 %1, 2 + %3 = add i64 %2, %b + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + +define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_zext_shift_offreg_2 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2] + %1 = zext i32 %a to i64 + %2 = shl i64 %1, 2 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + +define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_sext_shift_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2] + %1 = sext i32 %a to i64 + %2 = shl i64 %1, 2 + %3 = add i64 %2, %b + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + +define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_sext_shift_offreg_2 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2] + %1 = sext i32 %a to i64 + %2 = shl i64 %1, 2 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + |

