diff options
author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-26 18:30:11 +0000 |
---|---|---|
committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2016-07-26 18:30:11 +0000 |
commit | 1adca30c39d347ef1a5b5bcdc109eaa6741cc511 (patch) | |
tree | 40772bb20b9994b68a8f9befaf9d71e2f3df16fc | |
parent | 16da82f4d2c2f7b8140d4296859124d583e239c2 (diff) | |
download | bcm5719-llvm-1adca30c39d347ef1a5b5bcdc109eaa6741cc511.tar.gz bcm5719-llvm-1adca30c39d347ef1a5b5bcdc109eaa6741cc511.zip |
[Hexagon] Bitwise operations for insert/extract word not simplified
Change the bit simplifier to generate REG_SEQUENCE instructions in
addition to COPY, which will handle cases of word insert/extract.
llvm-svn: 276787
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp | 89 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonBitTracker.cpp | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/struct_args.ll | 8 |
4 files changed, 132 insertions, 29 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index c8b4a4cf938..9a8bc0e5b45 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1249,6 +1249,8 @@ bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, const RegisterSet&) { + if (!BT.reached(&B)) + return false; bool Changed = false; for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { @@ -1295,7 +1297,15 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(RS.Reg, 0, RS.Sub); HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); - BT.put(BitTracker::RegisterRef(NewR), SC); + // Do not update the bit tracker. This pass can create copies between + // registers that don't have the exact same values. Updating the + // tracker here may be tricky. E.g. + // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal + // + // vreg3 = copy vreg2 ; <- inserted + // ... = vreg3 ; <- replaced from vreg2 + // Indirectly, we can create a "copy" between vreg1 and vreg2 even + // though their exact values do not match. Changed = true; break; } @@ -1317,8 +1327,8 @@ namespace { MachineRegisterInfo &mri) : Transformation(true), HII(hii), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + static bool isTfrConst(const MachineInstr &MI); private: - bool isTfrConst(const MachineInstr &MI) const; bool isConst(unsigned R, int64_t &V) const; unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); @@ -1346,7 +1356,7 @@ bool ConstGeneration::isConst(unsigned R, int64_t &C) const { return true; } -bool ConstGeneration::isTfrConst(const MachineInstr &MI) const { +bool ConstGeneration::isTfrConst(const MachineInstr &MI) { unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::A2_combineii: @@ -1413,6 +1423,8 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { + if (!BT.reached(&B)) + return false; bool Changed = false; RegisterSet Defs; @@ -1426,14 +1438,16 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { unsigned DR = Defs.find_first(); if (!TargetRegisterInfo::isVirtualRegister(DR)) continue; - int64_t C; - if (isConst(DR, C)) { + uint64_t U; + const BitTracker::RegisterCell &DRC = BT.lookup(DR); + if (HBS::getConst(DRC, 0, DRC.width(), U)) { + int64_t C = U; DebugLoc DL = I->getDebugLoc(); auto At = I->isPHI() ? B.getFirstNonPHI() : I; unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); if (ImmReg) { HBS::replaceReg(DR, ImmReg, MRI); - BT.put(ImmReg, BT.lookup(DR)); + BT.put(ImmReg, DRC); Changed = true; } } @@ -1467,6 +1481,7 @@ namespace { const HexagonInstrInfo &HII; MachineRegisterInfo &MRI; BitTracker &BT; + RegisterSet Forbidden; }; class CopyPropagation : public Transformation { @@ -1491,17 +1506,20 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, if (!BT.has(Inp.Reg)) return false; const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); + auto *FRC = HBS::getFinalVRegClass(Inp, MRI); unsigned B, W; if (!HBS::getSubregMask(Inp, B, W, MRI)) return false; for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { - if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) + if (!BT.has(R) || Forbidden[R]) continue; const BitTracker::RegisterCell &RC = BT.lookup(R); unsigned RW = RC.width(); if (W == RW) { - if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) + if (FRC != MRI.getRegClass(R)) + continue; + if (!HBS::isTransparentCopy(R, Inp, MRI)) continue; if (!HBS::isEqual(InpRC, B, RC, 0, W)) continue; @@ -1524,7 +1542,8 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, else continue; Out.Reg = R; - return true; + if (HBS::isTransparentCopy(Out, Inp, MRI)) + return true; } return false; } @@ -1532,6 +1551,8 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, bool CopyGeneration::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { + if (!BT.reached(&B)) + return false; RegisterSet AVB(AVs); bool Changed = false; RegisterSet Defs; @@ -1543,20 +1564,44 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, HBS::getInstrDefs(*I, Defs); unsigned Opc = I->getOpcode(); - if (CopyPropagation::isCopyReg(Opc)) + if (CopyPropagation::isCopyReg(Opc) || ConstGeneration::isTfrConst(*I)) continue; + DebugLoc DL = I->getDebugLoc(); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { BitTracker::RegisterRef MR; - if (!findMatch(R, MR, AVB)) + auto *FRC = HBS::getFinalVRegClass(R, MRI); + + if (findMatch(R, MR, AVB)) { + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(MR.Reg, 0, MR.Sub); + BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + HBS::replaceReg(R, NewR, MRI); + Forbidden.insert(R); continue; - DebugLoc DL = I->getDebugLoc(); - auto *FRC = HBS::getFinalVRegClass(MR, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); - auto At = I->isPHI() ? B.getFirstNonPHI() : I; - BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) - .addReg(MR.Reg, 0, MR.Sub); - BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + } + + if (FRC == &Hexagon::DoubleRegsRegClass) { + // Try to generate REG_SEQUENCE. + BitTracker::RegisterRef TL = { R, Hexagon::subreg_loreg }; + BitTracker::RegisterRef TH = { R, Hexagon::subreg_hireg }; + BitTracker::RegisterRef ML, MH; + if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { + auto *FRC = HBS::getFinalVRegClass(R, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) + .addReg(ML.Reg, 0, ML.Sub) + .addImm(Hexagon::subreg_loreg) + .addReg(MH.Reg, 0, MH.Sub) + .addImm(Hexagon::subreg_hireg); + BT.put(BitTracker::RegisterRef(NewR), BT.get(R)); + HBS::replaceReg(R, NewR, MRI); + Forbidden.insert(R); + } + } } } @@ -2121,6 +2166,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, bool BitSimplification::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { + if (!BT.reached(&B)) + return false; bool Changed = false; RegisterSet AVB = AVs; RegisterSet Defs; @@ -2203,7 +2250,11 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { RegisterSet ARE; // Available registers for RIE. RedundantInstrElimination RIE(BT, HII, MRI); - Changed |= visitBlock(Entry, RIE, ARE); + bool Ried = visitBlock(Entry, RIE, ARE); + if (Ried) { + Changed = true; + BT.run(); + } RegisterSet ACG; // Available registers for CG. CopyGeneration CopyG(BT, HII, MRI); diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index 78b57d27ad5..d0842e5f260 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -138,8 +138,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, if (NumDefs == 0) return false; - if (MI.mayLoad()) - return evaluateLoad(MI, Inputs, Outputs); + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + + if (MI.mayLoad()) { + switch (Opc) { + // These instructions may be marked as mayLoad, but they are generating + // immediate values, so skip them. + case CONST32: + case CONST32_Int_Real: + case CONST64_Int_Real: + break; + default: + return evaluateLoad(MI, Inputs, Outputs); + } + } // Check COPY instructions that copy formal parameters into virtual // registers. Such parameters can be sign- or zero-extended at the @@ -174,8 +187,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } RegisterRefs Reg(MI); - unsigned Opc = MI.getOpcode(); - using namespace Hexagon; #define op(i) MI.getOperand(i) #define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs)) #define im(i) MI.getOperand(i).getImm() @@ -246,9 +257,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case A2_tfrsi: case A2_tfrpi: case CONST32: - case CONST32_Float_Real: case CONST32_Int_Real: - case CONST64_Float_Real: case CONST64_Int_Real: return rr0(eIMM(im(1), W0), Outputs); case TFR_PdFalse: diff --git a/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll b/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll new file mode 100644 index 00000000000..6b5a5ad8ec3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s +; Check that we don't generate any bitwise operations. + +; CHECK-NOT: = or( +; CHECK-NOT: = and( + +target triple = "hexagon" + +define i32 @fred(i32* nocapture readonly %p, i32 %n) #0 { +entry: + %t.sroa.0.048 = load i32, i32* %p, align 4 + %cmp49 = icmp ugt i32 %n, 1 + br i1 %cmp49, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %t.sroa.0.052 = phi i32 [ %t.sroa.0.0, %for.body ], [ %t.sroa.0.048, %entry ] + %t.sroa.11.051 = phi i64 [ %t.sroa.11.0.extract.shift, %for.body ], [ 0, %entry ] + %i.050 = phi i32 [ %inc, %for.body ], [ 1, %entry ] + %t.sroa.0.0.insert.ext = zext i32 %t.sroa.0.052 to i64 + %t.sroa.0.0.insert.insert = or i64 %t.sroa.0.0.insert.ext, %t.sroa.11.051 + %0 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert, i64 %t.sroa.0.0.insert.insert) + %t.sroa.11.0.extract.shift = and i64 %0, -4294967296 + %arrayidx4 = getelementptr inbounds i32, i32* %p, i32 %i.050 + %inc = add nuw i32 %i.050, 1 + %t.sroa.0.0 = load i32, i32* %arrayidx4, align 4 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %t.sroa.0.0.lcssa = phi i32 [ %t.sroa.0.048, %entry ], [ %t.sroa.0.0, %for.body ] + %t.sroa.11.0.lcssa = phi i64 [ 0, %entry ], [ %t.sroa.11.0.extract.shift, %for.body ] + %t.sroa.0.0.insert.ext17 = zext i32 %t.sroa.0.0.lcssa to i64 + %t.sroa.0.0.insert.insert19 = or i64 %t.sroa.0.0.insert.ext17, %t.sroa.11.0.lcssa + %1 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert19, i64 %t.sroa.0.0.insert.insert19) + %t.sroa.11.0.extract.shift41 = lshr i64 %1, 32 + %t.sroa.11.0.extract.trunc42 = trunc i64 %t.sroa.11.0.extract.shift41 to i32 + ret i32 %t.sroa.11.0.extract.trunc42 +} + +declare i64 @llvm.hexagon.A2.addp(i64, i64) #1 + +attributes #0 = { norecurse nounwind readonly } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/struct_args.ll b/llvm/test/CodeGen/Hexagon/struct_args.ll index 2ac1f8eadbb..11c23b82ec4 100644 --- a/llvm/test/CodeGen/Hexagon/struct_args.ll +++ b/llvm/test/CodeGen/Hexagon/struct_args.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s -; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}}) -; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32) +; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s +; CHECK-DAG: r0 = memw +; CHECK-DAG: r1 = memw %struct.small = type { i32, i32 } @@ -8,7 +8,7 @@ define void @foo() nounwind { entry: - %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 1 + %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 4 call void @bar(i64 %0) ret void } |