summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-26 04:43:48 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-26 04:43:48 +0000
commitc5f6152911201e2dfea7d2ab6bf2a2489242c9a7 (patch)
tree28c9d9482847d471f71cb030968021abc3ef04f7 /llvm/lib/Target/AMDGPU
parent018179fc46c08afdd0f160093712c9f38f8326d9 (diff)
downloadbcm5719-llvm-c5f6152911201e2dfea7d2ab6bf2a2489242c9a7.tar.gz
bcm5719-llvm-c5f6152911201e2dfea7d2ab6bf2a2489242c9a7.zip
AMDGPU: Make v32i8/v64i8 illegal types
Old intrinsics were forcing these, but they have now all been removed. This fixes large i8 vector operations generally being broken. llvm-svn: 258788
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td25
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td4
4 files changed, 13 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 972a929c943..faecf3c1da9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -43,9 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
- addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
-
addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8735277149a..7de293f012b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -120,7 +120,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
>;
class SDSample<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
+ SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7b4785bc819..9d7e6ca3fc8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2102,7 +2102,6 @@ let AddedComplexity = 100 in {
defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>;
defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
@@ -2343,34 +2342,34 @@ defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
/* SIsample for simple 1D texture lookup */
def : Pat <
- (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
+ (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
@@ -2423,22 +2422,22 @@ defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
/* int_SI_imageload for texture fetches consuming varying address parameters */
class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, imm),
+ (name addr_type:$addr, v8i32:$rsrc, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
+ (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
+ (name addr_type:$addr, v8i32:$rsrc, TEX_MSAA),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
>;
class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
- (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
+ (name addr_type:$addr, v8i32:$rsrc, TEX_ARRAY_MSAA),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
@@ -2552,12 +2551,8 @@ def : BitConvert <v4i32, v2f64, VReg_128>;
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
-def : BitConvert <v8i32, v32i8, SReg_256>;
-def : BitConvert <v32i8, v8i32, SReg_256>;
-def : BitConvert <v8i32, v32i8, VReg_256>;
def : BitConvert <v8i32, v8f32, VReg_256>;
def : BitConvert <v8f32, v8i32, VReg_256>;
-def : BitConvert <v32i8, v8i32, VReg_256>;
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index bfaf93709d8..5ab8830d1d9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -208,7 +208,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)
let CopyCost = 2;
}
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> {
+def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
}
@@ -236,7 +236,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG
let CopyCost = 4;
}
-def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> {
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
let CopyCost = 8;
}
OpenPOWER on IntegriCloud