diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 47 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 |
3 files changed, 64 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 54ede3e2d04..14d2f6fb61a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -489,6 +489,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::STORE); + if (Subtarget->supportsAddressTopByteIgnored()) + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); @@ -8555,10 +8557,9 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { return NewST1; } -static SDValue performSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG, - const AArch64Subtarget *Subtarget) { +static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -8720,7 +8721,39 @@ static SDValue performPostLD1Combine(SDNode *N, return SDValue(); } -/// This function handles the log2-shuffle pattern produced by the +/// Simplify \Addr given that the top byte of it is ignored by HW during +/// address translation. +static bool performTBISimplification(SDValue Addr, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + APInt DemandedMask = APInt::getLowBitsSet(64, 56); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), + DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + return true; + } + return false; +} + +static SDValue performSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + SDValue Split = split16BStores(N, DCI, DAG, Subtarget); + if (Split.getNode()) + return Split; + + if (Subtarget->supportsAddressTopByteIgnored() && + performTBISimplification(N->getOperand(2), DCI, DAG)) + return SDValue(N, 0); + + return SDValue(); +} + + /// This function handles the log2-shuffle pattern produced by the /// LoopVectorizer for the across vector reduction. It consists of /// log2(NumVectorElements) steps and, in each step, 2^(s) elements /// are reduced, where s is an induction variable from 0 to @@ -9575,6 +9608,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, } case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); + case ISD::LOAD: + if (performTBISimplification(N->getOperand(1), DCI, DAG)) + return SDValue(N, 0); + break; case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index e6ef6dc80e2..88af9602322 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -31,6 +31,11 @@ static cl::opt<bool> EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden); +// If OS supports TBI, use this flag to enable it. +static cl::opt<bool> +UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " + "an address is ignored"), cl::init(false), cl::Hidden); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { // Determine default and user-specified characteristics @@ -125,6 +130,19 @@ bool AArch64Subtarget::enableEarlyIfConversion() const { return EnableEarlyIfConvert; } +bool AArch64Subtarget::supportsAddressTopByteIgnored() const { + if (!UseAddressTopByteIgnored) + return false; + + if (TargetTriple.isiOS()) { + unsigned Major, Minor, Micro; + TargetTriple.getiOSVersion(Major, Minor, Micro); + return Major >= 8; + } + + return false; +} + std::unique_ptr<PBQPRAConstraint> AArch64Subtarget::getCustomPBQPConstraints() const { if (!isCortexA57()) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 78af28829e8..af617fe7220 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -115,6 +115,10 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + /// CPU has TBI (top byte of addresses is ignored during HW address + /// translation) and OS enables it. + bool supportsAddressTopByteIgnored() const; + bool hasPerfMon() const { return HasPerfMon; } bool isLittleEndian() const { return IsLittle; } |