summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-01 15:51:37 -0400
committerMatt Arsenault <arsenm2@gmail.com>2019-10-25 13:55:07 -0700
commit1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7 (patch)
tree9d232cbb5f6d86541f1a40299d208b9334d68074
parente8a0a0904b2b144929312ac424626b3e026bf9fb (diff)
downloadbcm5719-llvm-1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7.tar.gz
bcm5719-llvm-1a276d1e8c5da57a0c83d1b1d1a02ec0bcdb77d7.zip
GlobalISel: Implement widenScalar for G_INSERT_VECTOR_ELT
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp25
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir48
2 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 21512e54387..68c8304d79f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1789,10 +1789,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
+ // TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
+
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideVecTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 2) {
+ Observer.changingInstr(MI);
+ // TODO: Probably should be zext
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ }
+
+ return Legalized;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index df1cef761d2..0d35ec8c503 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -115,3 +115,51 @@ body: |
%3:_(<16 x s64>) = G_INSERT_VECTOR_ELT %1, %0, %2
S_ENDPGM 0, implicit %3
...
+
+---
+name: insert_vector_elt_0_v2s32_s8
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32)
+ ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[ASHR]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s8) = G_CONSTANT i8 0
+ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+ $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: insert_vector_elt_0_v2i8_i32
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY1]], [[COPY2]](s32), 0
+ ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY [[INSERT]](<2 x s32>)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[COPY3]](<2 x s32>)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s8) = G_TRUNC %0
+ %2:_(<2 x s8>) = G_IMPLICIT_DEF
+ %3:_(s32) = G_CONSTANT i32 0
+ %4:_(<2 x s8>) = G_INSERT_VECTOR_ELT %2, %1, %3
+ %5:_(<2 x s32>) = G_ANYEXT %4
+ $vgpr0_vgpr1 = COPY %5
+...
OpenPOWER on IntegriCloud