diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll | 23 |
10 files changed, 53 insertions, 33 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 712dcb03538..64f6fea87a9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -100,7 +100,7 @@ def int_amdgcn_rsq : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; -def int_amdgcn_rsq_clamped : Intrinsic< +def int_amdgcn_rsq_clamp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_amdgcn_ldexp : Intrinsic< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index c9d52fdc3ba..424f4d0e6e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2791,7 +2791,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RSQ_LEGACY) - NODE_NAME_CASE(RSQ_CLAMPED) + NODE_NAME_CASE(RSQ_CLAMP) NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 104b9ff9eb0..4965a565be7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -269,7 +269,7 @@ enum NodeType : unsigned { RCP, RSQ, RSQ_LEGACY, - RSQ_CLAMPED, + RSQ_CLAMP, LDEXP, FP_CLASS, DOT4, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 5e6d3102027..19932419ebe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -63,7 +63,7 @@ def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; +def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 64a3f4d862b..f87ddeefe0a 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -802,7 +802,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const // FIXME: Should be renamed to r600 prefix case AMDGPUIntrinsic::AMDGPU_rsq_clamped: - return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); case Intrinsic::r600_rsq: case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 82eb223b873..2bd7c16e9ac 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1101,7 +1101,7 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < // Clamped to maximum. class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamp > { let Itinerary = TransALU; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bb9a5a9205b..298100553e0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1379,10 +1379,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_rsq: case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); - case Intrinsic::amdgcn_rsq_clamped: + case Intrinsic::amdgcn_rsq_clamp: case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) - return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); Type *Type = VT.getTypeForEVT(*DAG.getContext()); APFloat Max = APFloat::getLargest(Type->getFltSemantics()); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 37dce31c9e9..06113273594 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1353,7 +1353,7 @@ defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>; defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>; defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32", - VOP_F32_F32, AMDGPUrsq_clamped + VOP_F32_F32, AMDGPUrsq_clamp >; defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy @@ -1365,7 +1365,7 @@ let SchedRW = [WriteDouble] in { defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>; defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64", - VOP_F64_F64, AMDGPUrsq_clamped + VOP_F64_F64, AMDGPUrsq_clamp >; } // End SchedRW = [WriteDouble] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll new file mode 100644 index 00000000000..ede1df76f2e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s + +declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 +declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 + +; FUNC-LABEL: {{^}}rsq_clamp_f32: +; SI: v_rsq_clamp_f32_e32 + +; VI: s_load_dword [[SRC:s[0-9]+]] +; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]] +; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] +; TODO: this constant should be folded: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff +; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]] +; VI: buffer_store_dword [[RESULT]] +define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 { + %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) + store float %rsq_clamp, float addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}rsq_clamp_f64: +; SI: v_rsq_clamp_f64_e32 + +; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] +; TODO: this constant should be folded: +; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 +; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff +; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff +; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] +define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 { + %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) + store double %rsq_clamp, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll deleted file mode 100644 index be7398cbb85..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s - -declare double @llvm.amdgcn.rsq.clamped.f64(double) nounwind readnone - -; FUNC-LABEL: {{^}}rsq_clamped_f64: -; SI: v_rsq_clamp_f64_e32 - -; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] -; TODO: this constant should be folded: -; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 -; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff -; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] -; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] -; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff -; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] -; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] - -define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { - %rsq_clamped = call double @llvm.amdgcn.rsq.clamped.f64(double %src) nounwind readnone - store double %rsq_clamped, double addrspace(1)* %out, align 8 - ret void -} |