diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll | 12 | 
2 files changed, 11 insertions, 8 deletions
| diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 36dcc699d4e..e40f3955774 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -397,14 +397,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {  // instructions.  static bool canVectorizeInst(Instruction *Inst, User *User) {    switch (Inst->getOpcode()) { -  case Instruction::Load: +  case Instruction::Load: { +    LoadInst *LI = cast<LoadInst>(Inst); +    return !LI->isVolatile(); +  }    case Instruction::BitCast:    case Instruction::AddrSpaceCast:      return true;    case Instruction::Store: {      // Must be the stored pointer operand, not a stored value.      StoreInst *SI = cast<StoreInst>(Inst); -    return SI->getPointerOperand() == User; +    return (SI->getPointerOperand() == User) && !SI->isVolatile();    }    default:      return false; diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll index 9c43a6dc60f..d7655993a2d 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll @@ -1,26 +1,26 @@  ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s  ; CHECK-LABEL: @volatile_load( -; CHECK: alloca [5 x i32] +; CHECK: alloca [4 x i32]  ; CHECK: load volatile i32, i32*  define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {  entry: -  %stack = alloca [5 x i32], align 4 +  %stack = alloca [4 x i32], align 4    %tmp = load i32, i32 addrspace(1)* %in, align 4 -  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp +  %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp    %load = load volatile i32, i32* %arrayidx1    store i32 %load, i32 addrspace(1)* %out   ret void  }  ; CHECK-LABEL: @volatile_store( -; CHECK: alloca [5 x i32] +; CHECK: alloca [4 x i32]  ; CHECK: store volatile i32 %tmp, i32*  define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {  entry: -  %stack = alloca [5 x i32], align 4 +  %stack = alloca [4 x i32], align 4    %tmp = load i32, i32 addrspace(1)* %in, align 4 -  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp +  %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp    store volatile i32 %tmp, i32* %arrayidx1   ret void  } | 

