diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2014-02-25 21:36:18 +0000 | 
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2014-02-25 21:36:18 +0000 | 
| commit | 1f15bff0dfbc704565e5fcfaa3a5336a67bf9705 (patch) | |
| tree | 3a3b3b18339687dc9269c00af195bb4d212659ff | |
| parent | 3b79b2ab4e35353e63ba323a3de4b0a70c61a5f1 (diff) | |
| download | bcm5719-llvm-1f15bff0dfbc704565e5fcfaa3a5336a67bf9705.tar.gz bcm5719-llvm-1f15bff0dfbc704565e5fcfaa3a5336a67bf9705.zip  | |
R600/SI: Custom select 64-bit ADD
llvm-svn: 202194
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 48 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/add.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/add_i64.ll | 23 | 
5 files changed, 75 insertions, 36 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index a9891350e57..fea875c3e75 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {    }    switch (Opc) {    default: break; +  // We are selecting i64 ADD here instead of custom lower it during +  // DAG legalization, so we can fold some i64 ADDs used for address +  // calculation into the LOAD and STORE instructions. +  case ISD::ADD: { +    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); +    if (N->getValueType(0) != MVT::i64 || +        ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) +      break; + +    SDLoc DL(N); +    SDValue LHS = N->getOperand(0); +    SDValue RHS = N->getOperand(1); + +    SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); +    SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + +    SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, +                                         DL, MVT::i32, LHS, Sub0); +    SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, +                                         DL, MVT::i32, LHS, Sub1); + +    SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, +                                         DL, MVT::i32, RHS, Sub0); +    SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, +                                         DL, MVT::i32, RHS, Sub1); + +    SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); + +    SmallVector<SDValue, 8> AddLoArgs; +    AddLoArgs.push_back(SDValue(Lo0, 0)); +    AddLoArgs.push_back(SDValue(Lo1, 0)); + +    SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, +                                           VTList, AddLoArgs); +    SDValue Carry = SDValue(AddLo, 1); +    SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL, +                                           MVT::i32, SDValue(Hi0, 0), +                                           SDValue(Hi1, 0), Carry); + +    SDValue Args[5] = { +      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), +      SDValue(AddLo,0), +      Sub0, +      SDValue(AddHi,0), +      Sub1, +    }; +    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5); +  }    case ISD::BUILD_VECTOR: {      unsigned RegClassID;      const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 9b6c4f4ed85..e1256415309 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); -  setOperationAction(ISD::ADD, MVT::i64, Legal);    setOperationAction(ISD::ADD, MVT::i32, Legal);    setOperationAction(ISD::ADDC, MVT::i32, Legal);    setOperationAction(ISD::ADDE, MVT::i32, Legal); @@ -488,7 +487,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();    switch (Op.getOpcode()) {    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); -  case ISD::ADD: return LowerADD(Op, DAG);    case ISD::BRCOND: return LowerBRCOND(Op, DAG);    case ISD::LOAD: {      LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); @@ -628,33 +626,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    return SDValue();  } -SDValue SITargetLowering::LowerADD(SDValue Op, -                                   SelectionDAG &DAG) const { -  if (Op.getValueType() != MVT::i64) -    return SDValue(); - -  SDLoc DL(Op); -  SDValue LHS = Op.getOperand(0); -  SDValue RHS = Op.getOperand(1); - -  SDValue Zero = DAG.getConstant(0, MVT::i32); -  SDValue One = DAG.getConstant(1, MVT::i32); - -  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero); -  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One); - -  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero); -  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One); - -  SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue); - -  SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1); -  SDValue Carry = AddLo.getValue(1); -  SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry); - -  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0)); -} -  /// \brief Helper function for LowerBRCOND  static SDNode *findUser(SDValue Value, unsigned Opcode) { diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index c7cd0d1c130..025f23869af 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -31,7 +31,6 @@ class SITargetLowering : public AMDGPUTargetLowering {    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; -  SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;    SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/R600/add.ll b/llvm/test/CodeGen/R600/add.ll index e4e7bc6c7f8..8de87f4999f 100644 --- a/llvm/test/CodeGen/R600/add.ll +++ b/llvm/test/CodeGen/R600/add.ll @@ -75,3 +75,13 @@ entry:    store <8 x i32> %0, <8 x i32> addrspace(1)* %out    ret void  } + +; FUNC-LABEL: @add64 +; SI-CHECK: S_ADD_I32 +; SI-CHECK: S_ADDC_U32 +define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: +  %0 = add i64 %a, %b +  store i64 %0, i64 addrspace(1)* %out +  ret void +} diff --git a/llvm/test/CodeGen/R600/add_i64.ll b/llvm/test/CodeGen/R600/add_i64.ll index 303a1cb0391..82cd8a9170d 100644 --- a/llvm/test/CodeGen/R600/add_i64.ll +++ b/llvm/test/CodeGen/R600/add_i64.ll @@ -1,14 +1,13 @@ -; XFAIL: * -; This will fail until i64 add is enabled -  ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s -declare i32 @llvm.SI.tid() readnone +declare i32 @llvm.r600.read.tidig.x() readnone  ; SI-LABEL: @test_i64_vreg: +; SI: V_ADD_I32 +; SI: V_ADDC_U32  define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) { -  %tid = call i32 @llvm.SI.tid() readnone +  %tid = call i32 @llvm.r600.read.tidig.x() readnone    %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid    %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid    %a = load i64 addrspace(1)* %a_ptr @@ -20,6 +19,8 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa  ; Check that the SGPR add operand is correctly moved to a VGPR.  ; SI-LABEL: @sgpr_operand: +; SI: V_ADD_I32 +; SI: V_ADDC_U32  define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {    %foo = load i64 addrspace(1)* %in, align 8    %result = add i64 %foo, %a @@ -31,6 +32,8 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal  ; SGPR as other operand.  ;  ; SI-LABEL: @sgpr_operand_reversed: +; SI: V_ADD_I32 +; SI: V_ADDC_U32  define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {    %foo = load i64 addrspace(1)* %in, align 8    %result = add i64 %a, %foo @@ -40,6 +43,10 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace  ; SI-LABEL: @test_v2i64_sreg: +; SI: S_ADD_I32 +; SI: S_ADDC_U32 +; SI: S_ADD_I32 +; SI: S_ADDC_U32  define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {    %result = add <2 x i64> %a, %b    store <2 x i64> %result, <2 x i64> addrspace(1)* %out @@ -47,8 +54,12 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,  }  ; SI-LABEL: @test_v2i64_vreg: +; SI: V_ADD_I32 +; SI: V_ADDC_U32 +; SI: V_ADD_I32 +; SI: V_ADDC_U32  define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { -  %tid = call i32 @llvm.SI.tid() readnone +  %tid = call i32 @llvm.r600.read.tidig.x() readnone    %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid    %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid    %a = load <2 x i64> addrspace(1)* %a_ptr  | 

