diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-18 12:23:46 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-18 12:23:46 +0000 | 
| commit | bcb5ea0042847a25b58c407e57e0c9b1c289e72a (patch) | |
| tree | 9f1484f6e257fdd82ae4dccaf70ac8c71dc9c159 /llvm/lib | |
| parent | e75e197ad879177bc643d4c5d5596bbe173555e7 (diff) | |
| download | bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.tar.gz bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.zip  | |
AMDGPU: Fold readlane from copy of SGPR or imm
These may be inserted to assert uniformity somewhere.
llvm-svn: 363670
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 7 | 
2 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 61a4407cbcf..00698e2dd4e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -506,6 +506,41 @@ void SIFoldOperands::foldOperand(        return;      } +    unsigned UseOpc = UseMI->getOpcode(); +    if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 || +        (UseOpc == AMDGPU::V_READLANE_B32 && +         (int)UseOpIdx == +         AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) { +      // %vgpr = V_MOV_B32 imm +      // %sgpr = V_READFIRSTLANE_B32 %vgpr +      // => +      // %sgpr = S_MOV_B32 imm +      if (FoldingImm) { +        if (!isEXECMaskConstantBetweenDefAndUses( +              UseMI->getOperand(UseOpIdx).getReg(), *MRI)) +          return; + +        UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); +        UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); +        UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) +        return; +      } + +      if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) { +        if (!isEXECMaskConstantBetweenDefAndUses( +              UseMI->getOperand(UseOpIdx).getReg(), *MRI)) +          return; + +        // %vgpr = COPY %sgpr0 +        // %sgpr1 = V_READFIRSTLANE_B32 %vgpr +        // => +        // %sgpr1 = COPY %sgpr0 +        UseMI->setDesc(TII->get(AMDGPU::COPY)); +        UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) +        return; +      } +    } +      const MCInstrDesc &UseDesc = UseMI->getDesc();      // Don't fold into target independent nodes.  Target independent opcodes diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 202955a9626..2c68fdf2db3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1696,6 +1696,13 @@ def : GCNPat<    (S_SUB_I32 $src0, NegSubInlineConst32:$src1)  >; +// Avoid pointlessly materializing a constant in VGPR. +// FIXME: Should also do this for readlane, but tablegen crashes on +// the ignored src1. +def : GCNPat< +  (int_amdgcn_readfirstlane (i32 imm:$src)), +  (S_MOV_B32 $src) +>;  multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {    def : GCNPat <  | 

