diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-03-31 19:35:33 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-03-31 19:35:33 +0000 |
| commit | 378bf9c68b41c1a8044d4b261c85b19806a6770f (patch) | |
| tree | fafedb122758c926a8f3ac25c957e7fc2c05af09 | |
| parent | 801335cc647582c03ceccdaa1138be4ead514eb7 (diff) | |
| download | bcm5719-llvm-378bf9c68b41c1a8044d4b261c85b19806a6770f.tar.gz bcm5719-llvm-378bf9c68b41c1a8044d4b261c85b19806a6770f.zip | |
R600: Compute masked bits for min and max
llvm-svn: 205242
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll | 15 |
3 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 54ef2c403f2..183725cc217 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +static void computeMaskedBitsForMinMax(const SDValue Op0, + const SDValue Op1, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) { + APInt Op0Zero, Op0One; + APInt Op1Zero, Op1One; + DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth); + DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth); + + KnownZero = Op0Zero & Op1Zero; + KnownOne = Op0One & Op1One; +} + void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything. + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::INTRINSIC_WO_CHAIN: { + // FIXME: The intrinsic should just use the node. + switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { + case AMDGPUIntrinsic::AMDGPU_imax: + case AMDGPUIntrinsic::AMDGPU_umax: + case AMDGPUIntrinsic::AMDGPU_imin: + case AMDGPUIntrinsic::AMDGPU_umin: + computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2), + KnownZero, KnownOne, DAG, Depth); + break; + default: + break; + } + + break; + } + case AMDGPUISD::SMAX: + case AMDGPUISD::UMAX: + case AMDGPUISD::SMIN: + case AMDGPUISD::UMIN: + computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1), + KnownZero, KnownOne, DAG, Depth); + break; + default: + break; + } } diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll index c3e1cfe9019..1b8da2e1553 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll @@ -21,6 +21,21 @@ entry: ret void } +; SI-LABEL: @trunc_zext_umax +; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]], +; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]] +; SI-NOT: AND +; SI: BUFFER_STORE_SHORT [[RESULT]], +define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { + %tmp5 = load i8 addrspace(1)* %src, align 1 + %tmp2 = zext i8 %tmp5 to i32 + %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone + %tmp4 = trunc i32 %tmp3 to i8 + %tmp6 = zext i8 %tmp4 to i16 + store i16 %tmp6, i16 addrspace(1)* %out, align 2 + ret void +} + ; Function Attrs: readnone declare i32 @llvm.AMDGPU.umax(i32, i32) #1 diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll index 460a7b2d425..08397f8356c 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll @@ -21,6 +21,21 @@ entry: ret void } +; SI-LABEL: @trunc_zext_umin +; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]], +; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]] +; SI-NOT: AND +; SI: BUFFER_STORE_SHORT [[RESULT]], +define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { + %tmp5 = load i8 addrspace(1)* %src, align 1 + %tmp2 = zext i8 %tmp5 to i32 + %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone + %tmp4 = trunc i32 %tmp3 to i8 + %tmp6 = zext i8 %tmp4 to i16 + store i16 %tmp6, i16 addrspace(1)* %out, align 2 + ret void +} + ; Function Attrs: readnone declare i32 @llvm.AMDGPU.umin(i32, i32) #1 |

