summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp69
-rw-r--r--llvm/test/CodeGen/RISCV/fastcc-float.ll71
-rw-r--r--llvm/test/CodeGen/RISCV/fastcc-int.ll85
3 files changed, 223 insertions, 2 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5c20057c999..dc829fce901 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1796,6 +1796,63 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
+// FastCC has less than 1% performance improvement for some particular
+// benchmark. But theoretically, it may has benenfit for some cases.
+static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32 || LocVT == MVT::i64) {
+ // X5 and X6 might be used for save-restore libcall.
+ static const MCPhysReg GPRList[] = {
+ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
+ RISCV::X29, RISCV::X30, RISCV::X31};
+ if (unsigned Reg = State.AllocateReg(GPRList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f32) {
+ static const MCPhysReg FPR32List[] = {
+ RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
+ RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
+ RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
+ RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
+ if (unsigned Reg = State.AllocateReg(FPR32List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f64) {
+ static const MCPhysReg FPR64List[] = {
+ RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
+ RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
+ RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
+ RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
+ if (unsigned Reg = State.AllocateReg(FPR64List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ unsigned Offset4 = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ unsigned Offset5 = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
// Transform physical registers into virtual registers.
SDValue RISCVTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
@@ -1835,7 +1892,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
+
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
+ else
+ analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2035,7 +2096,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
+
+ if (CallConv == CallingConv::Fast)
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
+ else
+ analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
// Check if it's really possible to do a tail call.
if (IsTailCall)
diff --git a/llvm/test/CodeGen/RISCV/fastcc-float.ll b/llvm/test/CodeGen/RISCV/fastcc-float.ll
new file mode 100644
index 00000000000..a70c26bf62f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fastcc-float.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+define fastcc float @callee(<32 x float> %A) nounwind {
+; CHECK-LABEL: callee:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.w a0, fa0
+; CHECK-NEXT: ret
+ %B = extractelement <32 x float> %A, i32 0
+ ret float %B
+}
+
+; With the fastcc, arguments will be passed by fa0-fa7 and ft0-ft11.
+; The rest will be pushed on the stack.
+define float @caller(<32 x float> %A) nounwind {
+; CHECK-LABEL: caller:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: sw ra, 60(sp)
+; CHECK-NEXT: flw fa0, 0(a0)
+; CHECK-NEXT: flw fa1, 4(a0)
+; CHECK-NEXT: flw fa2, 8(a0)
+; CHECK-NEXT: flw fa3, 12(a0)
+; CHECK-NEXT: flw fa4, 16(a0)
+; CHECK-NEXT: flw fa5, 20(a0)
+; CHECK-NEXT: flw fa6, 24(a0)
+; CHECK-NEXT: flw fa7, 28(a0)
+; CHECK-NEXT: flw ft0, 32(a0)
+; CHECK-NEXT: flw ft1, 36(a0)
+; CHECK-NEXT: flw ft2, 40(a0)
+; CHECK-NEXT: flw ft3, 44(a0)
+; CHECK-NEXT: flw ft4, 48(a0)
+; CHECK-NEXT: flw ft5, 52(a0)
+; CHECK-NEXT: flw ft6, 56(a0)
+; CHECK-NEXT: flw ft7, 60(a0)
+; CHECK-NEXT: flw ft8, 64(a0)
+; CHECK-NEXT: flw ft9, 68(a0)
+; CHECK-NEXT: flw ft10, 72(a0)
+; CHECK-NEXT: flw ft11, 76(a0)
+; CHECK-NEXT: flw fs0, 80(a0)
+; CHECK-NEXT: flw fs1, 84(a0)
+; CHECK-NEXT: flw fs2, 88(a0)
+; CHECK-NEXT: flw fs3, 92(a0)
+; CHECK-NEXT: flw fs4, 96(a0)
+; CHECK-NEXT: flw fs5, 100(a0)
+; CHECK-NEXT: flw fs6, 104(a0)
+; CHECK-NEXT: flw fs7, 108(a0)
+; CHECK-NEXT: flw fs8, 112(a0)
+; CHECK-NEXT: flw fs9, 116(a0)
+; CHECK-NEXT: flw fs10, 120(a0)
+; CHECK-NEXT: flw fs11, 124(a0)
+; CHECK-NEXT: fsw fs11, 44(sp)
+; CHECK-NEXT: fsw fs10, 40(sp)
+; CHECK-NEXT: fsw fs9, 36(sp)
+; CHECK-NEXT: fsw fs8, 32(sp)
+; CHECK-NEXT: fsw fs7, 28(sp)
+; CHECK-NEXT: fsw fs6, 24(sp)
+; CHECK-NEXT: fsw fs5, 20(sp)
+; CHECK-NEXT: fsw fs4, 16(sp)
+; CHECK-NEXT: fsw fs3, 12(sp)
+; CHECK-NEXT: fsw fs2, 8(sp)
+; CHECK-NEXT: fsw fs1, 4(sp)
+; CHECK-NEXT: fsw fs0, 0(sp)
+; CHECK-NEXT: call callee
+; CHECK-NEXT: lw ra, 60(sp)
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+ %C = call fastcc float @callee(<32 x float> %A)
+ ret float %C
+}
diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
new file mode 100644
index 00000000000..a48639d6626
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32 %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64 %s
+
+define fastcc i32 @callee(<16 x i32> %A) nounwind {
+; RV32-LABEL: callee:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: callee:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+ %B = extractelement <16 x i32> %A, i32 0
+ ret i32 %B
+}
+
+; With the fastcc, arguments will be passed by a0-a7 and t2-t6.
+; The rest will be pushed on the stack.
+define i32 @caller(<16 x i32> %A) nounwind {
+; RV32-LABEL: caller:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: sw ra, 28(sp)
+; RV32-NEXT: sw s0, 24(sp)
+; RV32-NEXT: lw t0, 0(a0)
+; RV32-NEXT: lw a1, 4(a0)
+; RV32-NEXT: lw a2, 8(a0)
+; RV32-NEXT: lw a3, 12(a0)
+; RV32-NEXT: lw a4, 16(a0)
+; RV32-NEXT: lw a5, 20(a0)
+; RV32-NEXT: lw a6, 24(a0)
+; RV32-NEXT: lw a7, 28(a0)
+; RV32-NEXT: lw t2, 32(a0)
+; RV32-NEXT: lw t3, 36(a0)
+; RV32-NEXT: lw t4, 40(a0)
+; RV32-NEXT: lw t5, 44(a0)
+; RV32-NEXT: lw t6, 48(a0)
+; RV32-NEXT: lw t1, 52(a0)
+; RV32-NEXT: lw s0, 56(a0)
+; RV32-NEXT: lw a0, 60(a0)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw s0, 4(sp)
+; RV32-NEXT: sw t1, 0(sp)
+; RV32-NEXT: mv a0, t0
+; RV32-NEXT: call callee
+; RV32-NEXT: lw s0, 24(sp)
+; RV32-NEXT: lw ra, 28(sp)
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: caller:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -48
+; RV64-NEXT: sd ra, 40(sp)
+; RV64-NEXT: sd s0, 32(sp)
+; RV64-NEXT: ld t0, 0(a0)
+; RV64-NEXT: ld a1, 8(a0)
+; RV64-NEXT: ld a2, 16(a0)
+; RV64-NEXT: ld a3, 24(a0)
+; RV64-NEXT: ld a4, 32(a0)
+; RV64-NEXT: ld a5, 40(a0)
+; RV64-NEXT: ld a6, 48(a0)
+; RV64-NEXT: ld a7, 56(a0)
+; RV64-NEXT: ld t2, 64(a0)
+; RV64-NEXT: ld t3, 72(a0)
+; RV64-NEXT: ld t4, 80(a0)
+; RV64-NEXT: ld t5, 88(a0)
+; RV64-NEXT: ld t6, 96(a0)
+; RV64-NEXT: ld t1, 104(a0)
+; RV64-NEXT: ld s0, 112(a0)
+; RV64-NEXT: ld a0, 120(a0)
+; RV64-NEXT: sd a0, 16(sp)
+; RV64-NEXT: sd s0, 8(sp)
+; RV64-NEXT: sd t1, 0(sp)
+; RV64-NEXT: mv a0, t0
+; RV64-NEXT: call callee
+; RV64-NEXT: ld s0, 32(sp)
+; RV64-NEXT: ld ra, 40(sp)
+; RV64-NEXT: addi sp, sp, 48
+; RV64-NEXT: ret
+ %C = call fastcc i32 @callee(<16 x i32> %A)
+ ret i32 %C
+}
OpenPOWER on IntegriCloud