diff options
| author | Amara Emerson <aemerson@apple.com> | 2019-04-15 05:04:20 +0000 |
|---|---|---|
| committer | Amara Emerson <aemerson@apple.com> | 2019-04-15 05:04:20 +0000 |
| commit | 946b1246d6c8fa95e827770e1d287184fc3dfd55 (patch) | |
| tree | 7ac5551da962b92b876d9e37104ab3a5384291cd /llvm/lib | |
| parent | d189680baa07e4ed6300cf07c18a1cd0ee5f4be9 (diff) | |
| download | bcm5719-llvm-946b1246d6c8fa95e827770e1d287184fc3dfd55.tar.gz bcm5719-llvm-946b1246d6c8fa95e827770e1d287184fc3dfd55.zip | |
[GlobalISel] Enable CSE in the IRTranslator & legalizer for -O0 with constants only.
Other opcodes shouldn't be CSE'd until we can be sure debug info quality won't
be degraded.
This change also improves the IRTranslator so that in most places, but not all,
it creates constants using the MIRBuilder directly instead of first creating a
new destination vreg and then creating a constant. By doing this, the
buildConstant() method can just return the vreg of an existing G_CONSTANT
instead of having to create a COPY from it.
I measured a 0.2% improvement in compile time and a 0.9% improvement in code
size at -O0 ARM64.
Compile time:
Program base cse diff
test-suite...ark/tramp3d-v4/tramp3d-v4.test 9.04 9.12 0.8%
test-suite...Mark/mafft/pairlocalalign.test 2.68 2.66 -0.7%
test-suite...-typeset/consumer-typeset.test 5.53 5.51 -0.4%
test-suite :: CTMark/lencod/lencod.test 5.30 5.28 -0.3%
test-suite :: CTMark/Bullet/bullet.test 25.82 25.76 -0.2%
test-suite...:: CTMark/ClamAV/clamscan.test 6.92 6.90 -0.2%
test-suite...TMark/7zip/7zip-benchmark.test 34.24 34.17 -0.2%
test-suite :: CTMark/SPASS/SPASS.test 6.25 6.24 -0.1%
test-suite...:: CTMark/sqlite3/sqlite3.test 1.66 1.66 -0.1%
test-suite :: CTMark/kimwitu++/kc.test 13.61 13.60 -0.0%
Geomean difference -0.2%
Code size:
Program base cse diff
test-suite...-typeset/consumer-typeset.test 1315632 1266480 -3.7%
test-suite...:: CTMark/ClamAV/clamscan.test 1313892 1297508 -1.2%
test-suite :: CTMark/lencod/lencod.test 1439504 1423112 -1.1%
test-suite...TMark/7zip/7zip-benchmark.test 2936980 2904172 -1.1%
test-suite :: CTMark/Bullet/bullet.test 3478276 3445460 -0.9%
test-suite...ark/tramp3d-v4/tramp3d-v4.test 8082868 8033492 -0.6%
test-suite :: CTMark/kimwitu++/kc.test 3870380 3853972 -0.4%
test-suite :: CTMark/SPASS/SPASS.test 1434904 1434896 -0.0%
test-suite...Mark/mafft/pairlocalalign.test 764528 764528 0.0%
test-suite...:: CTMark/sqlite3/sqlite3.test 782092 782092 0.0%
Geomean difference -0.9%
Differential Revision: https://reviews.llvm.org/D60580
llvm-svn: 358369
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetPassConfig.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 28 |
7 files changed, 28 insertions, 34 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index a87ef20a3be..4518dbee1a9 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -65,7 +65,7 @@ std::unique_ptr<CSEConfigBase> llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { std::unique_ptr<CSEConfigBase> Config; if (Level == CodeGenOpt::None) - Config = make_unique<CSEConfigBase>(); + Config = make_unique<CSEConfigConstantOnly>(); else Config = make_unique<CSEConfigFull>(); return Config; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 2e268ed27a9..d368be89703 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -645,9 +645,9 @@ bool IRTranslator::translateGetElementPtr(const User &U, if (Offset != 0) { unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - unsigned OffsetReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); BaseReg = NewBaseReg; Offset = 0; @@ -664,11 +664,10 @@ bool IRTranslator::translateGetElementPtr(const User &U, // Avoid doing it for ElementSize of 1. unsigned GepOffsetReg; if (ElementSize != 1) { - unsigned ElementSizeReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + auto ElementSizeMIB = MIRBuilder.buildConstant( + getLLTForType(*OffsetIRTy, *DL), ElementSize); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); } else GepOffsetReg = IdxReg; @@ -679,8 +678,9 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg); + auto OffsetMIB = + MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset); + MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index efdae5790ab..52a6bb66570 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" #include <iterator> diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 110165d4316..735d4419083 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -215,10 +215,8 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, } Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); - unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy); - - buildConstant(TmpReg, Value); - return buildGEP(Res, Op0, TmpReg); + auto Cst = buildConstant(ValueTy, Value); + return buildGEP(Res, Op0, Cst.getReg(0)); } MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e5c7ceff112..09ae7bc02e4 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1226,7 +1226,7 @@ bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { } bool TargetPassConfig::isGISelCSEEnabled() const { - return getOptLevel() != CodeGenOpt::Level::None; + return true; } std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const { diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 3a42ef11ab5..85110b2ec76 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -648,11 +648,10 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, ValSize, std::max(Align, PtrSize))); - unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy); - MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize)); + auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize)); unsigned NewList = MRI.createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewList, DstPtr, SizeReg); + MIRBuilder.buildGEP(NewList, DstPtr, Size.getReg(0)); MIRBuilder.buildStore( NewList, ListPtr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5835ccec579..79499a4f3f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -698,7 +698,6 @@ unsigned AMDGPULegalizerInfo::getSegmentAperture( Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ | WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_; - unsigned ShiftAmt = MRI.createGenericVirtualRegister(S32); unsigned ApertureReg = MRI.createGenericVirtualRegister(S32); unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); @@ -707,11 +706,11 @@ unsigned AMDGPULegalizerInfo::getSegmentAperture( .addImm(Encoding); MRI.setType(GetReg, S32); - MIRBuilder.buildConstant(ShiftAmt, WidthM1 + 1); + auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1); MIRBuilder.buildInstr(TargetOpcode::G_SHL) .addDef(ApertureReg) .addUse(GetReg) - .addUse(ShiftAmt); + .addUse(ShiftAmt.getReg(0)); return ApertureReg; } @@ -781,11 +780,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( DestAS == AMDGPUAS::PRIVATE_ADDRESS); unsigned NullVal = TM.getNullPointerValue(DestAS); - unsigned SegmentNullReg = MRI.createGenericVirtualRegister(DstTy); - unsigned FlatNullReg = MRI.createGenericVirtualRegister(SrcTy); - - MIRBuilder.buildConstant(SegmentNullReg, NullVal); - MIRBuilder.buildConstant(FlatNullReg, 0); + auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal); + auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0); unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy); @@ -793,8 +789,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MIRBuilder.buildExtract(PtrLo32, Src, 0); unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNullReg); - MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNullReg); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); + MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); MI.eraseFromParent(); return true; @@ -803,15 +799,15 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS); - unsigned FlatNullReg = MRI.createGenericVirtualRegister(DstTy); - unsigned SegmentNullReg = MRI.createGenericVirtualRegister(SrcTy); - MIRBuilder.buildConstant(SegmentNullReg, TM.getNullPointerValue(SrcAS)); - MIRBuilder.buildConstant(FlatNullReg, TM.getNullPointerValue(DestAS)); + auto SegmentNull = + MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); + auto FlatNull = + MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder); unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNullReg); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy); @@ -824,7 +820,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); - MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNullReg); + MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); MI.eraseFromParent(); return true; |

