diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2013-07-23 01:48:35 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2013-07-23 01:48:35 +0000 |
| commit | 9f95033d3345bd7a86a5e132fafd72d51e30eef9 (patch) | |
| tree | e51b4d6065c155b88a538e4d3c344cdd30eef594 | |
| parent | ba30932908ab09f5bb9fa33ba793816e2b1ea398 (diff) | |
| download | bcm5719-llvm-9f95033d3345bd7a86a5e132fafd72d51e30eef9.tar.gz bcm5719-llvm-9f95033d3345bd7a86a5e132fafd72d51e30eef9.zip | |
R600: Improve support for < 32-bit loads
Reviewed-by: Vincent Lejeune <vljn at ovi.com>
llvm-svn: 186921
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUInstructions.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600Instructions.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/load.ll | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/short-args.ll | 36 |
6 files changed, 106 insertions, 25 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUInstructions.td b/llvm/lib/Target/R600/AMDGPUInstructions.td index b3cbe992d73..04618f27e17 100644 --- a/llvm/lib/Target/R600/AMDGPUInstructions.td +++ b/llvm/lib/Target/R600/AMDGPUInstructions.td @@ -96,11 +96,19 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; }]>; -def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ +def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + +def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + +def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; @@ -112,10 +120,18 @@ def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ +def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; +def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + +def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; }]>; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 632cbcf48a7..9aeebc94361 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -1404,6 +1404,10 @@ def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] >; +def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, + [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] +>; + // 32-bit reads def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] @@ -1852,6 +1856,10 @@ def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] >; +def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, + [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] +>; + // 32-bit reads def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 316567cef46..e4a37c2fb9f 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -116,9 +116,9 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, MVT::i64)); - return DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, Chain, Ptr, + return DAG.getLoad(VT, DL, Chain, Ptr, MachinePointerInfo(UndefValue::get(PtrTy)), - VT, false, false, ArgVT.getSizeInBits() >> 3); + false, false, false, ArgVT.getSizeInBits() >> 3); } diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 1ddbd072502..61163c2982d 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMA //def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; //def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>; -//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; -//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; -//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; +defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>; +defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>; +defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>; defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; @@ -1741,12 +1741,16 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, >; } -defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, - global_load, constant_load>; -defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, - global_load, constant_load>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, + sextloadi8_global, sextloadi8_constant>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_global, az_extloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, + sextloadi16_global, sextloadi16_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, + az_extloadi16_global, az_extloadi16_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, + global_load, constant_load>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, global_load, constant_load>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, diff --git a/llvm/test/CodeGen/R600/load.ll b/llvm/test/CodeGen/R600/load.ll index a1b15bd0b05..f36f20c63d5 100644 --- a/llvm/test/CodeGen/R600/load.ll +++ b/llvm/test/CodeGen/R600/load.ll @@ -15,6 +15,51 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } +; R600-CHECK: @load_i8_sext +; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600-CHECK: 24 +; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600-CHECK: 24 +; SI-CHECK: @load_i8_sext +; SI-CHECK: BUFFER_LOAD_SBYTE +define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +entry: + %0 = load i8 addrspace(1)* %in + %1 = sext i8 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an i16 value from the global address space. +; R600-CHECK: @load_i16 +; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-CHECK: @load_i16 +; SI-CHECK: BUFFER_LOAD_USHORT +define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { +entry: + %0 = load i16 addrspace(1)* %in + %1 = zext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_i16_sext +; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600-CHECK: 16 +; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600-CHECK: 16 +; SI-CHECK: @load_i16_sext +; SI-CHECK: BUFFER_LOAD_SSHORT +define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { +entry: + %0 = load i16 addrspace(1)* %in + %1 = sext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + ; load an i32 value from the global address space. ; R600-CHECK: @load_i32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 diff --git a/llvm/test/CodeGen/R600/short-args.ll b/llvm/test/CodeGen/R600/short-args.ll index 69a8412c88e..20d0ae43c9d 100644 --- a/llvm/test/CodeGen/R600/short-args.ll +++ b/llvm/test/CodeGen/R600/short-args.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK -; CHECK: @i8_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i8_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: BUFFER_LOAD_UBYTE define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -11,8 +13,9 @@ entry: ret void } -; CHECK: @i8_zext_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i8_zext_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: @@ -21,8 +24,10 @@ entry: ret void } -; CHECK: @i8_sext_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i8_sext_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11 + define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: %0 = sext i8 %in to i32 @@ -30,8 +35,9 @@ entry: ret void } -; CHECK: @i16_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i16_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: BUFFER_LOAD_USHORT define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: @@ -40,8 +46,9 @@ entry: ret void } -; CHECK: @i16_zext_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i16_zext_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: @@ -50,8 +57,9 @@ entry: ret void } -; CHECK: @i16_sext_arg -; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; EG-CHECK: @i16_sext_arg +; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: |

