1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
|
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the RISCV target.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
// RISCV-specific code to select RISCV machine instructions for
// SelectionDAG operations.
namespace {
class RISCVDAGToDAGISel final : public SelectionDAGISel {
const RISCVSubtarget *Subtarget;
public:
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
: SelectionDAGISel(TargetMachine) {}
StringRef getPassName() const override {
return "RISCV DAG->DAG Pattern Instruction Selection";
}
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<RISCVSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
void PostprocessISelDAG() override;
void Select(SDNode *Node) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrFI(SDValue Addr, SDValue &Base);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
private:
void doPeepholeLoadStoreADDI();
void doPeepholeGlobalAddiLuiOffset();
void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
doPeepholeGlobalAddiLuiOffset();
doPeepholeBuildPairF64SplitF64();
}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
MVT XLenVT = Subtarget->getXLenVT();
// If we have a custom node, we have already selected
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
Node->setNodeId(-1);
return;
}
// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.
EVT VT = Node->getValueType(0);
if (Opcode == ISD::Constant && VT == XLenVT) {
auto *ConstNode = cast<ConstantSDNode>(Node);
// Materialize zero constants as copies from X0. This allows the coalescer
// to propagate these into other instructions.
if (ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
}
if (Opcode == ISD::FrameIndex) {
SDLoc DL(Node);
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
// Select the default instruction.
SelectCode(Node);
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
// We just support simple memory operands that have a single address
// operand and need no special handling.
OutOps.push_back(Op);
return false;
default:
break;
}
return true;
}
bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
return true;
}
return false;
}
// Detect the pattern lui %hi(global) --> ADDI %lo(global)
// HiLUI LoADDI
static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI,
SDValue &HiLUI, GlobalAddressSDNode *&GAlo,
GlobalAddressSDNode *&GAhi) {
// Try to detect the pattern on every operand of the tail instruction.
for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) {
LoADDI = Tail->getOperand(Idx);
// LoADDI should only be used by one instruction (Tail).
if (!LoADDI->isMachineOpcode() ||
!(LoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) ||
!LoADDI->hasOneUse())
continue;
// Check for existence of %lo target flag.
GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1));
if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) ||
!(GAlo->getOffset() == 0))
return false;
// Check for existence of %hi target flag.
HiLUI = LoADDI->getOperand(0);
if (!HiLUI->isMachineOpcode() ||
!(HiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse())
return false;
GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0));
if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) ||
!(GAhi->getOffset() == 0))
return false;
return true;
}
return false;
}
static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) {
if (!OffsetLUI->isMachineOpcode() ||
!(OffsetLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetLUI->getOperand(0)))
return false;
Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue();
Offset = Offset << 12;
LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n");
return true;
}
static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) {
// LoADDI should only be used by the tail instruction only.
if (!OffsetLoADDI->isMachineOpcode() ||
!(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) ||
!OffsetLoADDI->hasOneUse())
return false;
int64_t OffLo =
cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue();
// HiLUI should only be used by the loADDI.
SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0));
if (!OffsetHiLUI->isMachineOpcode() ||
!(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) ||
!OffsetHiLUI->hasOneUse())
return false;
int64_t OffHi =
cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue();
Offset = (OffHi << 12) + OffLo;
LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n");
return true;
}
static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG,
GlobalAddressSDNode *GAhi,
GlobalAddressSDNode *GAlo,
SDValue &GlobalHiLUI, SDValue &GlobalLoADDI,
int64_t Offset) {
// Update the offset in GAhi and GAlo.
SDLoc DL(Tail->getOperand(1));
SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL,
GlobalHiLUI.getValueType(),
Offset, RISCVII::MO_HI);
SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL,
GlobalLoADDI.getValueType(),
Offset, RISCVII::MO_LO);
CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew);
CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew);
// Update all uses of the Tail with the GlobalLoADDI. After
// this Tail will be a dead node.
SDValue From = SDValue(Tail, 0);
CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1);
}
// TODO: This transformation might be better implemeted in a Machine Funtion
// Pass as discussed here: https://reviews.llvm.org/D45748.
//
// Merge the offset of address calculation into the offset field
// of a global address node in a global address lowering sequence ("LUI
// %hi(global) --> add %lo(global)") under the following conditions: 1) The
// offset field in the global address lowering sequence is zero. 2) The lowered
// global address is only used in one node, referred to as "Tail".
// This peephole does the following transformations to merge the offset:
// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset).
//
// This generates:
// lui a0, hi (global + offset)
// add a0, a0, lo (global + offset)
// Instead of
// lui a0, hi (global)
// addi a0, hi (global)
// addi a0, offset
// This pattern is for cases when the offset is small enough to fit in the
// immediate filed of ADDI (less than 12 bits).
// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
// --->
// offset = hi_offset << 12
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a0, hi(global + offset)
// addi a0, lo(global + offset)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offset)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI but the lower 12 bits are all zeros.
// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI
// hi_offset)))
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a1, %hi(global + offhi20<<12 + offlo12)
// addi a1, %lo(global + offhi20<<12 + offlo12)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offhi20)
// addi a1, (offlo12)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI and both the lower 1 bits and high 20 bits are non zero.
void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin());
while (Position != Begin) {
SDNode *Tail = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (Tail->use_empty() || !Tail->isMachineOpcode())
continue;
// The tail instruction can be an ADD or an ADDI.
if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD ||
Tail->getMachineOpcode() == RISCV::ADDI))
continue;
// First detect the global address part of pattern:
// (lui %hi(global) --> Addi %lo(global))
unsigned GlobalLoADDiIdx;
SDValue GlobalLoADDI;
SDValue GlobalHiLUI;
GlobalAddressSDNode *GAhi;
GlobalAddressSDNode *GAlo;
if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI,
GAlo, GAhi))
continue;
LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n");
// Detect the offset part for the address calculation by looking at the
// other operand of the tail instruction:
int64_t Offset;
if (Tail->getMachineOpcode() == RISCV::ADD) {
// If the Tail is an ADD instruction, the offset can be in two forms:
// 1) LUI hi_Offset followed by:
// ADDI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LUI (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx);
if (!matchAddiLuiOffset(OffsetVal, Offset) &&
!matchLuiOffset(OffsetVal, Offset))
continue;
} else
// The Tail is an ADDI instruction:
Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx))
->getSExtValue();
LLVM_DEBUG(
dbgs()
<< " Fold offset value into global offset of LUI %hi and ADDI %lo\n");
LLVM_DEBUG(dbgs() << "\tTail:");
LLVM_DEBUG(Tail->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:");
LLVM_DEBUG(GlobalHiLUI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:");
LLVM_DEBUG(GlobalLoADDI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI,
Offset);
}
CurDAG->RemoveDeadNodes();
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (add base, off), 0) -> (load base, off)
// (store val, (add base, off)) -> (store val, base, off)
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
int OffsetOpIdx;
int BaseOpIdx;
// Only attempt this optimisation for I-type loads and S-type stores.
switch (N->getMachineOpcode()) {
default:
continue;
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLW:
case RISCV::FLD:
BaseOpIdx = 0;
OffsetOpIdx = 1;
break;
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSW:
case RISCV::FSD:
BaseOpIdx = 1;
OffsetOpIdx = 2;
break;
}
// Currently, the load/store offset must be 0 to be considered for this
// peephole optimisation.
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
N->getConstantOperandVal(OffsetOpIdx) != 0)
continue;
SDValue Base = N->getOperand(BaseOpIdx);
// If the base is an ADDI, we can merge it in to the load/store.
if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
continue;
SDValue ImmOperand = Base.getOperand(1);
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetConstant(
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
GA->getOffset(), GA->getTargetFlags());
} else {
continue;
}
LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
LLVM_DEBUG(Base->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\nN: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
// Modify the offset operand of the load/store.
if (BaseOpIdx == 0) // Load
CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
N->getOperand(2));
else // Store
CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
ImmOperand, N->getOperand(3));
// The add-immediate may now be dead, in which case remove it.
if (Base.getNode()->use_empty())
CurDAG->RemoveDeadNode(Base.getNode());
}
}
// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
// built of two i32 values, only to be split apart again. This must be done
// here as a peephole optimisation as the DAG has not been fully legalized at
// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
// some nodes would not yet have been replaced with libcalls.
void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any nodes other than SplitF64Pseudo.
if (N->use_empty() || !N->isMachineOpcode() ||
!(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
continue;
// If the operand to SplitF64 is a BuildPairF64, the split operation is
// redundant. Just use the operands to BuildPairF64 as the result.
SDValue F64Val = N->getOperand(0);
if (F64Val.isMachineOpcode() &&
F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
LLVM_DEBUG(
dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
"with BuildPairF64Pseudo operands:\n");
LLVM_DEBUG(dbgs() << "N: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "F64Val: ");
LLVM_DEBUG(F64Val->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
}
}
CurDAG->RemoveDeadNodes();
}
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
return new RISCVDAGToDAGISel(TM);
}
|