summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
authorSam Kolton <Sam.Kolton@amd.com>2017-03-31 11:42:43 +0000
committerSam Kolton <Sam.Kolton@amd.com>2017-03-31 11:42:43 +0000
commit27e0f8bc72b4fbda199bd190b755cb83440696b0 (patch)
treef4e75b5852d8c3799a81d1a6a7c20f537d561292 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
parent37b536e4b3bd2bbce3009820d861947d676625fb (diff)
downloadbcm5719-llvm-27e0f8bc72b4fbda199bd190b755cb83440696b0.tar.gz
bcm5719-llvm-27e0f8bc72b4fbda199bd190b755cb83440696b0.zip
[AMDGPU] SDWA Peephole: improve search for immediates in SDWA patterns
Previously compiler often extracted common immediates into specific register, e.g.: ``` %vreg0 = S_MOV_B32 0xff; %vreg2 = V_AND_B32_e32 %vreg0, %vreg1 %vreg4 = V_AND_B32_e32 %vreg0, %vreg3 ``` Because of this SDWA peephole failed to find SDWA convertible pattern. E.g. in previous example this could be converted into 2 SDWA src operands: ``` SDWA src: %vreg2 src_sel:BYTE_0 SDWA src: %vreg4 src_sel:BYTE_0 ``` With this change peephole check if operand is either immediate or register that is copy of immediate. llvm-svn: 299202
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp23
1 files changed, 1 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 04922f652b9..d63414735b9 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -139,27 +139,6 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
-static bool isFoldableCopy(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO: {
- // If there are additional implicit register operands, this may be used for
- // register indexing so the source register operand isn't simply copied.
- unsigned NumOps = MI.getDesc().getNumOperands() +
- MI.getDesc().getNumImplicitUses();
-
- return MI.getNumOperands() == NumOps;
- }
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::COPY:
- return true;
- default:
- return false;
- }
-}
-
static bool updateOperand(FoldCandidate &Fold,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
@@ -936,7 +915,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
tryFoldInst(TII, &MI);
- if (!isFoldableCopy(MI)) {
+ if (!TII->isFoldableCopy(MI)) {
if (IsIEEEMode || !tryFoldOMod(MI))
tryFoldClamp(MI);
continue;
OpenPOWER on IntegriCloud