diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 69 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.h | 13 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUInstrInfo.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/R600/R600Instructions.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 4 |
5 files changed, 75 insertions, 33 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 5561bf8976c..6d608d130ff 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -378,6 +378,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::STORE); @@ -999,21 +1000,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, +SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, SelectionDAG &DAG) const { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); + if (VT != MVT::f32 && + (VT != MVT::f64 || + Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)) + return SDValue(); - if (VT != MVT::f32 || - !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); - } ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); switch (CCOpcode) { @@ -1029,14 +1030,15 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETTRUE2: case ISD::SETUO: case ISD::SETO: - llvm_unreachable("Operation should already be optimised!"); + break; case ISD::SETULE: case ISD::SETULT: case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMIN_LEGACY : AMDGPUISD::FMAX_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: @@ -1045,7 +1047,8 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETOGE: case ISD::SETUGT: case ISD::SETOGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMAX_LEGACY : AMDGPUISD::FMIN_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: @@ -2110,9 +2113,37 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT_CC: { - return CombineMinMax(N, DAG); + case ISD::SELECT_CC: { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); + + return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } + case ISD::SELECT: { + SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() == ISD::SETCC) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + SDValue CC = Cond.getOperand(2); + + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + + return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); } + + break; + } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && @@ -2289,10 +2320,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FRACT) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(MAD) - NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(FMAX_LEGACY) NODE_NAME_CASE(SMAX) NODE_NAME_CASE(UMAX) - NODE_NAME_CASE(FMIN) + NODE_NAME_CASE(FMIN_LEGACY) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) NODE_NAME_CASE(URECIP) diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index ea60c1b9e8d..793c84754a3 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -140,7 +140,14 @@ public: SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const; + SDValue CombineMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const; const char* getTargetNodeName(unsigned Opcode) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, @@ -188,10 +195,10 @@ enum { // Denormals handled on some parts. COS_HW, SIN_HW, - FMAX, + FMAX_LEGACY, SMAX, UMAX, - FMIN, + FMIN_LEGACY, SMIN, UMIN, URECIP, diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td index 3d70791bd5a..037767d1087 100644 --- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td @@ -58,9 +58,12 @@ def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; -// out = max(a, b) a and b are floats -def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] +// out = max(a, b) a and b are floats, where a nan comparison fails. +// This is not commutative because this gives the second operand: +// x < nan ? x : nan -> nan +// nan < x ? nan : x -> x +def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, + [SDNPAssociative] >; def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>; @@ -76,9 +79,9 @@ def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; -// out = min(a, b) a and b are floats -def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative] +// out = min(a, b) a and b are floats, where a nan comparison fails. +def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, + [SDNPAssociative] >; // out = min(a, b) a snd b are signed ints @@ -137,7 +140,7 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", // MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) // // src0: vec4(src, 0, 0, mask) -// src1: dst - rat offset (aka pointer) in dwords +// src1: dst - rat offset (aka pointer) in dwords def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", SDTypeProfile<0, 2, []>, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 7ce05a21062..69695a3bce3 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; // Non-IEEE MUL: 0 * anything = 0 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; -def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; -def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; +// TODO: Do these actually match the regular fmin/fmax behavior? +def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; +def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>; // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, // so some of the instruction names don't match the asm string. diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 14ad4529ab8..9d4b8bb200e 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1398,11 +1398,11 @@ defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb>, "v_mul_u32_u24", defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmin + VOP_F32_F32_F32, AMDGPUfmin_legacy >; defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32", - VOP_F32_F32_F32, AMDGPUfmax + VOP_F32_F32_F32, AMDGPUfmax_legacy >; defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "v_min_f32", VOP_F32_F32_F32, fminnum>; |