diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-10-01 15:51:37 -0400 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2019-10-25 13:55:07 -0700 |
commit | 1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7 (patch) | |
tree | 9d232cbb5f6d86541f1a40299d208b9334d68074 | |
parent | e8a0a0904b2b144929312ac424626b3e026bf9fb (diff) | |
download | bcm5719-llvm-1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7.tar.gz bcm5719-llvm-1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7.zip |
GlobalISel: Implement widenScalar for G_INSERT_VECTOR_ELT
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir | 48 |
2 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 21512e54387..68c8304d79f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1789,10 +1789,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); + // TODO: Probably should be zext widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx == 1) { + Observer.changingInstr(MI); + + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); + + widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideVecTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + + if (TypeIdx == 2) { + Observer.changingInstr(MI); + // TODO: Probably should be zext + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + } + + return Legalized; + } case TargetOpcode::G_FADD: case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index df1cef761d2..0d35ec8c503 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -115,3 +115,51 @@ body: | %3:_(<16 x s64>) = G_INSERT_VECTOR_ELT %1, %0, %2 S_ENDPGM 0, implicit %3 ... + +--- +name: insert_vector_elt_0_v2s32_s8 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[ASHR]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s8) = G_CONSTANT i8 0 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: insert_vector_elt_0_v2i8_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY1]], [[COPY2]](s32), 0 + ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY [[INSERT]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY3]](<2 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(<2 x s8>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %4:_(<2 x s8>) = G_INSERT_VECTOR_ELT %2, %1, %3 + %5:_(<2 x s32>) = G_ANYEXT %4 + $vgpr0_vgpr1 = COPY %5 +... |