summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-22 11:07:15 -0700
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-10-22 11:27:36 -0700
commit48f57138be55a939afc64d357f7b26220206127a (patch)
tree2ca3176f861ebfc0ae3f34aa6552471db6dd0e2b
parent19ca80ef0575b376e135271c7d44799803407941 (diff)
downloadbcm5719-llvm-48f57138be55a939afc64d357f7b26220206127a.tar.gz
bcm5719-llvm-48f57138be55a939afc64d357f7b26220206127a.zip
[AMDGPU] Allow tied operand subreg folding
Turns out it makes sense, contrarily to what comment said. Differential Revision: https://reviews.llvm.org/D69287
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp12
-rw-r--r--llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir16
2 files changed, 16 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4eac0316876..69ac367f7ab 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -513,18 +513,6 @@ void SIFoldOperands::foldOperand(
if (UseOp.isReg() && OpToFold.isReg()) {
if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
return;
-
- // Don't fold subregister extracts into tied operands, only if it is a full
- // copy since a subregister use tied to a full register def doesn't really
- // make sense. e.g. don't fold:
- //
- // %1 = COPY %0:sub1
- // %2<tied3> = V_MAC_{F16, F32} %3, %4, %1<tied0>
- //
- // into
- // %2<tied3> = V_MAC_{F16, F32} %3, %4, %0:sub1<tied0>
- if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
- return;
}
// Special case for REG_SEQUENCE: We can't fold literals into
diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
index 7d1c75c3a5b..db57d7727cd 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
@@ -25,3 +25,19 @@ body: |
DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec
...
+
+# GCN-LABEL: name: fma_sgpr_use
+# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
+# GCN: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %0.sub0, 0, 1073741824, 0, %3, 0, 0, implicit $exec
+
+---
+name: fma_sgpr_use
+body: |
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0.sub0
+ %2:sgpr_32 = COPY %0.sub1
+ %3:vgpr_32 = COPY %2
+ %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %1, 0, 1073741824, 0, %3, 0, 0, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %5:vgpr_32, killed %4, undef %6:vgpr_32, 0, 1, 0, implicit $exec
+...
OpenPOWER on IntegriCloud