diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll | 28 |
2 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 88abd84366a..899a7be5d6e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1934,6 +1934,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // FIXME: This is to handle i1 vectors with elements promoted to i8. + // i1 vector handling needs general improvement. + if (N->getValueType(0).bitsLT(EltVT)) { + SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); + } + return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll new file mode 100644 index 00000000000..b09bcad446b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}bit4_extelt: +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] +define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}bit128_extelt: +define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +} |