diff options
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 69 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/fastcc-float.ll | 71 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/fastcc-int.ll | 85 | 
3 files changed, 223 insertions, 2 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5c20057c999..dc829fce901 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1796,6 +1796,63 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,    return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);  } +// FastCC has less than 1% performance improvement for some particular +// benchmark. But theoretically, it may has benenfit for some cases. +static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, +                            CCValAssign::LocInfo LocInfo, +                            ISD::ArgFlagsTy ArgFlags, CCState &State) { + +  if (LocVT == MVT::i32 || LocVT == MVT::i64) { +    // X5 and X6 might be used for save-restore libcall. +    static const MCPhysReg GPRList[] = { +        RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, +        RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28, +        RISCV::X29, RISCV::X30, RISCV::X31}; +    if (unsigned Reg = State.AllocateReg(GPRList)) { +      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); +      return false; +    } +  } + +  if (LocVT == MVT::f32) { +    static const MCPhysReg FPR32List[] = { +        RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, +        RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F, +        RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F, +        RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; +    if (unsigned Reg = State.AllocateReg(FPR32List)) { +      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); +      return false; +    } +  } + +  if (LocVT == MVT::f64) { +    static const MCPhysReg FPR64List[] = { +        RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, +        RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D, +        RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D, +        RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; +    if (unsigned Reg = State.AllocateReg(FPR64List)) { +      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); +      return false; +    } +  } + +  if (LocVT == MVT::i32 || LocVT == MVT::f32) { +    unsigned Offset4 = State.AllocateStack(4, 4); +    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); +    return false; +  } + +  if (LocVT == MVT::i64 || LocVT == MVT::f64) { +    unsigned Offset5 = State.AllocateStack(8, 8); +    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); +    return false; +  } + +  return true; // CC didn't match. +} +  // Transform physical registers into virtual registers.  SDValue RISCVTargetLowering::LowerFormalArguments(      SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1835,7 +1892,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments(    // Assign locations to all of the incoming arguments.    SmallVector<CCValAssign, 16> ArgLocs;    CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); -  analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); + +  if (CallConv == CallingConv::Fast) +    CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); +  else +    analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {      CCValAssign &VA = ArgLocs[i]; @@ -2035,7 +2096,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,    // Analyze the operands of the call, assigning locations to each operand.    SmallVector<CCValAssign, 16> ArgLocs;    CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); -  analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + +  if (CallConv == CallingConv::Fast) +    ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); +  else +    analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);    // Check if it's really possible to do a tail call.    if (IsTailCall) diff --git a/llvm/test/CodeGen/RISCV/fastcc-float.ll b/llvm/test/CodeGen/RISCV/fastcc-float.ll new file mode 100644 index 00000000000..a70c26bf62f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fastcc-float.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \ +; RUN:     | FileCheck %s + +define fastcc float @callee(<32 x float> %A) nounwind { +; CHECK-LABEL: callee: +; CHECK:       # %bb.0: +; CHECK-NEXT:    fmv.x.w a0, fa0 +; CHECK-NEXT:    ret +	%B = extractelement <32 x float> %A, i32 0 +	ret float %B +} + +; With the fastcc, arguments will be passed by fa0-fa7 and ft0-ft11. +; The rest will be pushed on the stack. +define float @caller(<32 x float> %A) nounwind { +; CHECK-LABEL: caller: +; CHECK:       # %bb.0: +; CHECK-NEXT:    addi sp, sp, -64 +; CHECK-NEXT:    sw ra, 60(sp) +; CHECK-NEXT:    flw fa0, 0(a0) +; CHECK-NEXT:    flw fa1, 4(a0) +; CHECK-NEXT:    flw fa2, 8(a0) +; CHECK-NEXT:    flw fa3, 12(a0) +; CHECK-NEXT:    flw fa4, 16(a0) +; CHECK-NEXT:    flw fa5, 20(a0) +; CHECK-NEXT:    flw fa6, 24(a0) +; CHECK-NEXT:    flw fa7, 28(a0) +; CHECK-NEXT:    flw ft0, 32(a0) +; CHECK-NEXT:    flw ft1, 36(a0) +; CHECK-NEXT:    flw ft2, 40(a0) +; CHECK-NEXT:    flw ft3, 44(a0) +; CHECK-NEXT:    flw ft4, 48(a0) +; CHECK-NEXT:    flw ft5, 52(a0) +; CHECK-NEXT:    flw ft6, 56(a0) +; CHECK-NEXT:    flw ft7, 60(a0) +; CHECK-NEXT:    flw ft8, 64(a0) +; CHECK-NEXT:    flw ft9, 68(a0) +; CHECK-NEXT:    flw ft10, 72(a0) +; CHECK-NEXT:    flw ft11, 76(a0) +; CHECK-NEXT:    flw fs0, 80(a0) +; CHECK-NEXT:    flw fs1, 84(a0) +; CHECK-NEXT:    flw fs2, 88(a0) +; CHECK-NEXT:    flw fs3, 92(a0) +; CHECK-NEXT:    flw fs4, 96(a0) +; CHECK-NEXT:    flw fs5, 100(a0) +; CHECK-NEXT:    flw fs6, 104(a0) +; CHECK-NEXT:    flw fs7, 108(a0) +; CHECK-NEXT:    flw fs8, 112(a0) +; CHECK-NEXT:    flw fs9, 116(a0) +; CHECK-NEXT:    flw fs10, 120(a0) +; CHECK-NEXT:    flw fs11, 124(a0) +; CHECK-NEXT:    fsw fs11, 44(sp) +; CHECK-NEXT:    fsw fs10, 40(sp) +; CHECK-NEXT:    fsw fs9, 36(sp) +; CHECK-NEXT:    fsw fs8, 32(sp) +; CHECK-NEXT:    fsw fs7, 28(sp) +; CHECK-NEXT:    fsw fs6, 24(sp) +; CHECK-NEXT:    fsw fs5, 20(sp) +; CHECK-NEXT:    fsw fs4, 16(sp) +; CHECK-NEXT:    fsw fs3, 12(sp) +; CHECK-NEXT:    fsw fs2, 8(sp) +; CHECK-NEXT:    fsw fs1, 4(sp) +; CHECK-NEXT:    fsw fs0, 0(sp) +; CHECK-NEXT:    call callee +; CHECK-NEXT:    lw ra, 60(sp) +; CHECK-NEXT:    addi sp, sp, 64 +; CHECK-NEXT:    ret +	%C = call fastcc float @callee(<32 x float> %A) +	ret float %C +} diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll new file mode 100644 index 00000000000..a48639d6626 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN:   | FileCheck -check-prefix=RV32 %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN:   | FileCheck -check-prefix=RV64 %s + +define fastcc i32 @callee(<16 x i32> %A) nounwind { +; RV32-LABEL: callee: +; RV32:       # %bb.0: +; RV32-NEXT:    ret +; +; RV64-LABEL: callee: +; RV64:       # %bb.0: +; RV64-NEXT:    ret +	%B = extractelement <16 x i32> %A, i32 0 +	ret i32 %B +} + +; With the fastcc, arguments will be passed by a0-a7 and t2-t6. +; The rest will be pushed on the stack. +define i32 @caller(<16 x i32> %A) nounwind { +; RV32-LABEL: caller: +; RV32:       # %bb.0: +; RV32-NEXT:    addi sp, sp, -32 +; RV32-NEXT:    sw ra, 28(sp) +; RV32-NEXT:    sw s0, 24(sp) +; RV32-NEXT:    lw t0, 0(a0) +; RV32-NEXT:    lw a1, 4(a0) +; RV32-NEXT:    lw a2, 8(a0) +; RV32-NEXT:    lw a3, 12(a0) +; RV32-NEXT:    lw a4, 16(a0) +; RV32-NEXT:    lw a5, 20(a0) +; RV32-NEXT:    lw a6, 24(a0) +; RV32-NEXT:    lw a7, 28(a0) +; RV32-NEXT:    lw t2, 32(a0) +; RV32-NEXT:    lw t3, 36(a0) +; RV32-NEXT:    lw t4, 40(a0) +; RV32-NEXT:    lw t5, 44(a0) +; RV32-NEXT:    lw t6, 48(a0) +; RV32-NEXT:    lw t1, 52(a0) +; RV32-NEXT:    lw s0, 56(a0) +; RV32-NEXT:    lw a0, 60(a0) +; RV32-NEXT:    sw a0, 8(sp) +; RV32-NEXT:    sw s0, 4(sp) +; RV32-NEXT:    sw t1, 0(sp) +; RV32-NEXT:    mv a0, t0 +; RV32-NEXT:    call callee +; RV32-NEXT:    lw s0, 24(sp) +; RV32-NEXT:    lw ra, 28(sp) +; RV32-NEXT:    addi sp, sp, 32 +; RV32-NEXT:    ret +; +; RV64-LABEL: caller: +; RV64:       # %bb.0: +; RV64-NEXT:    addi sp, sp, -48 +; RV64-NEXT:    sd ra, 40(sp) +; RV64-NEXT:    sd s0, 32(sp) +; RV64-NEXT:    ld t0, 0(a0) +; RV64-NEXT:    ld a1, 8(a0) +; RV64-NEXT:    ld a2, 16(a0) +; RV64-NEXT:    ld a3, 24(a0) +; RV64-NEXT:    ld a4, 32(a0) +; RV64-NEXT:    ld a5, 40(a0) +; RV64-NEXT:    ld a6, 48(a0) +; RV64-NEXT:    ld a7, 56(a0) +; RV64-NEXT:    ld t2, 64(a0) +; RV64-NEXT:    ld t3, 72(a0) +; RV64-NEXT:    ld t4, 80(a0) +; RV64-NEXT:    ld t5, 88(a0) +; RV64-NEXT:    ld t6, 96(a0) +; RV64-NEXT:    ld t1, 104(a0) +; RV64-NEXT:    ld s0, 112(a0) +; RV64-NEXT:    ld a0, 120(a0) +; RV64-NEXT:    sd a0, 16(sp) +; RV64-NEXT:    sd s0, 8(sp) +; RV64-NEXT:    sd t1, 0(sp) +; RV64-NEXT:    mv a0, t0 +; RV64-NEXT:    call callee +; RV64-NEXT:    ld s0, 32(sp) +; RV64-NEXT:    ld ra, 40(sp) +; RV64-NEXT:    addi sp, sp, 48 +; RV64-NEXT:    ret +	%C = call fastcc i32 @callee(<16 x i32> %A) +	ret i32 %C +}  | 

