diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CaymanInstructions.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/EvergreenInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mad_int24.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 10 |
12 files changed, 5 insertions, 163 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index ce9d5b42ec0..86c823a50c5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -926,22 +926,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_umul24: - return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, - Op.getOperand(1), Op.getOperand(2)); - - case AMDGPUIntrinsic::AMDGPU_imul24: - return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, - Op.getOperand(1), Op.getOperand(2)); - - case AMDGPUIntrinsic::AMDGPU_umad24: - return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - - case AMDGPUIntrinsic::AMDGPU_imad24: - return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index f145dfd259b..c8b7bb2e8ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -597,30 +597,6 @@ class UMad24Pat<Instruction Inst> : Pat < (Inst $src0, $src1, $src2) >; -multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> { - def _expand_imad24 : Pat < - (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) - >; - - def _expand_imul24 : Pat < - (AMDGPUmul_i24 i32:$src0, i32:$src1), - (MulInst $src0, $src1) - >; -} - -multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> { - def _expand_umad24 : Pat < - (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) - >; - - def _expand_umul24 : Pat < - (AMDGPUmul_u24 i32:$src0, i32:$src1), - (MulInst $src0, $src1) - >; -} - class RcpPat<Instruction RcpInst, ValueType vt> : Pat < (fdiv FP_ONE, vt:$src), (RcpInst $src) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td index 8fb372d71fa..753ce8eb9e2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -16,10 +16,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; - def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td index a6c3785c815..ffbe7ce93d8 100644 --- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td +++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td @@ -51,7 +51,6 @@ def : RsqPat<RECIPSQRT_IEEE_cm, f32>; def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; -defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 4a7810bcf5d..7fd6ca90336 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -85,8 +85,6 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; -defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>; - //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 605062d3a54..82eb223b873 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1719,12 +1719,6 @@ def : DwordAddrPat <i32, R600_Reg32>; } // End isR600toCayman Predicate -let Predicates = [isR600] in { -// Intrinsic patterns -defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>; -defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>; -} // End isR600 - def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"]; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll deleted file mode 100644 index 42102e30f07..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - -; FIXME: Store of i32 seems to be broken pre-EG somehow? - -declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone - -; FUNC-LABEL: {{^}}test_imad24: -; SI: v_mad_i32_i24 -; CM: MULADD_INT24 -; R600: MULLO_INT -; R600: ADD_INT -define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { - %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone - store i32 %mad, i32 addrspace(1)* %out, align 4 - ret void -} - diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll deleted file mode 100644 index fdc1172260b..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone - -; FUNC-LABEL: {{^}}test_imul24: -; SI: v_mul_i32_i24 -; CM: MUL_INT24 -; R600: MULLO_INT -define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { - %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone - store i32 %mul, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll deleted file mode 100644 index 77a073b0cb0..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone - -; FUNC-LABEL: {{^}}test_umad24: -; SI: v_mad_u32_u24 -; EG: MULADD_UINT24 -; R600: MULLO_UINT -; R600: ADD_INT -define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { - %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone - store i32 %mad, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}commute_umad24: -; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]] -; SI: buffer_store_dword [[RESULT]] -define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1 - - %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4 - %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4 - %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone - store i32 %mad, i32 addrspace(1)* %out.gep, align 4 - ret void -} - diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll deleted file mode 100644 index 76624a078b3..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone - -; FUNC-LABEL: {{^}}test_umul24: -; SI: v_mul_u32_u24 -; R600: MUL_UINT24 -; R600: MULLO_UINT -define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { - %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone - store i32 %mul, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll index 86d75a63ca4..def14c10d42 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll @@ -3,8 +3,6 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone - ; FUNC-LABEL: {{^}}i32_mad24: ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. ; EG: MULLO_INT @@ -24,12 +22,3 @@ entry: store i32 %3, i32 addrspace(1)* %out ret void } - -; FUNC-LABEL: @test_imul24 -; SI: v_mad_i32_i24 -define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { - %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone - %add = add i32 %mul, %src2 - store i32 %add, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index 87b925a24a0..e92a4fda10b 100644 --- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -4,7 +4,7 @@ declare float @llvm.fma.f32(float, float, float) #1 declare double @llvm.fma.f64(double, double, double) #1 declare float @llvm.fmuladd.f32(float, float, float) #1 -declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 +declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop: @@ -118,11 +118,11 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: ; GCN: s_load_dword [[SGPR:s[0-9]+]] -; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] +; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] ; GCN: buffer_store_dword [[RESULT]] -define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 { - %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1 - store i32 %fma, i32 addrspace(1)* %out, align 4 +define void @test_sgpr_use_twice_ternary_op_imm_a_a(float addrspace(1)* %out, float %a) #0 { + %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) #1 + store float %val, float addrspace(1)* %out, align 4 ret void } |