diff options
author | Sam Kolton <Sam.Kolton@amd.com> | 2017-03-31 11:42:43 +0000 |
---|---|---|
committer | Sam Kolton <Sam.Kolton@amd.com> | 2017-03-31 11:42:43 +0000 |
commit | 27e0f8bc72b4fbda199bd190b755cb83440696b0 (patch) | |
tree | f4e75b5852d8c3799a81d1a6a7c20f537d561292 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | |
parent | 37b536e4b3bd2bbce3009820d861947d676625fb (diff) | |
download | bcm5719-llvm-27e0f8bc72b4fbda199bd190b755cb83440696b0.tar.gz bcm5719-llvm-27e0f8bc72b4fbda199bd190b755cb83440696b0.zip |
[AMDGPU] SDWA Peephole: improve search for immediates in SDWA patterns
Previously compiler often extracted common immediates into specific register, e.g.:
```
%vreg0 = S_MOV_B32 0xff;
%vreg2 = V_AND_B32_e32 %vreg0, %vreg1
%vreg4 = V_AND_B32_e32 %vreg0, %vreg3
```
Because of this SDWA peephole failed to find SDWA convertible pattern. E.g. in previous example this could be converted into 2 SDWA src operands:
```
SDWA src: %vreg2 src_sel:BYTE_0
SDWA src: %vreg4 src_sel:BYTE_0
```
With this change peephole check if operand is either immediate or register that is copy of immediate.
llvm-svn: 299202
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 23 |
1 files changed, 1 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 04922f652b9..d63414735b9 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -139,27 +139,6 @@ FunctionPass *llvm::createSIFoldOperandsPass() { return new SIFoldOperands(); } -static bool isFoldableCopy(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case AMDGPU::V_MOV_B32_e32: - case AMDGPU::V_MOV_B32_e64: - case AMDGPU::V_MOV_B64_PSEUDO: { - // If there are additional implicit register operands, this may be used for - // register indexing so the source register operand isn't simply copied. - unsigned NumOps = MI.getDesc().getNumOperands() + - MI.getDesc().getNumImplicitUses(); - - return MI.getNumOperands() == NumOps; - } - case AMDGPU::S_MOV_B32: - case AMDGPU::S_MOV_B64: - case AMDGPU::COPY: - return true; - default: - return false; - } -} - static bool updateOperand(FoldCandidate &Fold, const TargetRegisterInfo &TRI) { MachineInstr *MI = Fold.UseMI; @@ -936,7 +915,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { tryFoldInst(TII, &MI); - if (!isFoldableCopy(MI)) { + if (!TII->isFoldableCopy(MI)) { if (IsIEEEMode || !tryFoldOMod(MI)) tryFoldClamp(MI); continue; |