summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-04 22:18:27 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-04 22:18:27 +0000
commit68f05052638e11164bc88cd4022ad78d8ad72306 (patch)
tree077e0797317387386a1088fcc6f984d3cc8c98da /llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
parente6667ded4d0bf664271b36dd98cc5ca69604ae9e (diff)
downloadbcm5719-llvm-68f05052638e11164bc88cd4022ad78d8ad72306.tar.gz
bcm5719-llvm-68f05052638e11164bc88cd4022ad78d8ad72306.zip
AMDGPU: Fix creating invalid copy when adjusting dmask
Move the entire optimization to one place. Before it was possible to adjust dmask without changing the register class of the output instruction, since they were done in separate places. Fix all lane sizes and move all of the optimization into the DAG folding. llvm-svn: 319705
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp56
1 files changed, 50 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 1e23aa8411a..49447862b60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -56,15 +56,59 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
-int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
- switch (Channels) {
- default: return Opcode;
- case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
- case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
- case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
+static AMDGPU::Channels indexToChannel(unsigned Channel) {
+ switch (Channel) {
+ case 1:
+ return AMDGPU::Channels_1;
+ case 2:
+ return AMDGPU::Channels_2;
+ case 3:
+ return AMDGPU::Channels_3;
+ case 4:
+ return AMDGPU::Channels_4;
+ default:
+ llvm_unreachable("invalid MIMG channel");
}
}
+// FIXME: Need to handle d16 images correctly.
+static unsigned rcToChannels(unsigned RCID) {
+ switch (RCID) {
+ case AMDGPU::VGPR_32RegClassID:
+ return 1;
+ case AMDGPU::VReg_64RegClassID:
+ return 2;
+ case AMDGPU::VReg_96RegClassID:
+ return 3;
+ case AMDGPU::VReg_128RegClassID:
+ return 4;
+ default:
+ llvm_unreachable("invalid MIMG register class");
+ }
+}
+
+int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc,
+ unsigned NewChannels) const {
+ AMDGPU::Channels Channel = indexToChannel(NewChannels);
+ unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass);
+ if (NewChannels == OrigChannels)
+ return Opc;
+
+ switch (OrigChannels) {
+ case 1:
+ return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
+ case 2:
+ return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
+ case 3:
+ return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
+ case 4:
+ return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
+ default:
+ llvm_unreachable("invalid MIMG channel");
+ }
+}
+
+
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
OpenPOWER on IntegriCloud