diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-26 04:43:48 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-26 04:43:48 +0000 |
| commit | c5f6152911201e2dfea7d2ab6bf2a2489242c9a7 (patch) | |
| tree | 28c9d9482847d471f71cb030968021abc3ef04f7 /llvm/lib/Target/AMDGPU | |
| parent | 018179fc46c08afdd0f160093712c9f38f8326d9 (diff) | |
| download | bcm5719-llvm-c5f6152911201e2dfea7d2ab6bf2a2489242c9a7.tar.gz bcm5719-llvm-c5f6152911201e2dfea7d2ab6bf2a2489242c9a7.zip | |
AMDGPU: Make v32i8/v64i8 illegal types
Old intrinsics were forcing these, but they have now all
been removed. This fixes large i8 vector operations generally
being broken.
llvm-svn: 258788
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 4 |
4 files changed, 13 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 972a929c943..faecf3c1da9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -43,9 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); - addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); - addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 8735277149a..7de293f012b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -120,7 +120,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", >; class SDSample<string opcode> : SDNode <opcode, - SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>, + SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>, SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> >; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7b4785bc819..9d7e6ca3fc8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2102,7 +2102,6 @@ let AddedComplexity = 100 in { defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>; defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; @@ -2343,34 +2342,34 @@ defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">; /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), + (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm), + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT), + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY), + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW), + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < - (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; @@ -2423,22 +2422,22 @@ defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16, /* int_SI_imageload for texture fetches consuming varying address parameters */ class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < - (name addr_type:$addr, v32i8:$rsrc, imm), + (name addr_type:$addr, v8i32:$rsrc, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) >; class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < - (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY), + (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) >; class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < - (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA), + (name addr_type:$addr, v8i32:$rsrc, TEX_MSAA), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) >; class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < - (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA), + (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY_MSAA), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) >; @@ -2552,12 +2551,8 @@ def : BitConvert <v4i32, v2f64, VReg_128>; def : BitConvert <v8f32, v8i32, SReg_256>; def : BitConvert <v8i32, v8f32, SReg_256>; -def : BitConvert <v8i32, v32i8, SReg_256>; -def : BitConvert <v32i8, v8i32, SReg_256>; -def : BitConvert <v8i32, v32i8, VReg_256>; def : BitConvert <v8i32, v8f32, VReg_256>; def : BitConvert <v8f32, v8i32, VReg_256>; -def : BitConvert <v32i8, v8i32, VReg_256>; def : BitConvert <v16i32, v16f32, VReg_512>; def : BitConvert <v16f32, v16i32, VReg_512>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index bfaf93709d8..5ab8830d1d9 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -208,7 +208,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128) let CopyCost = 2; } -def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> { +def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; } @@ -236,7 +236,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG let CopyCost = 4; } -def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> { +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { let CopyCost = 8; } |

