summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-20 16:42:52 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-20 16:42:52 +0000
commit75e30c4d5d5b83dc3fec0dd976085ec94a3d5005 (patch)
tree0152859865fe0f948b05c60ec72017b48f565304 /llvm
parent8061acd501f1cb6c00a886f4ee5cb9adc6cda39a (diff)
downloadbcm5719-llvm-75e30c4d5d5b83dc3fec0dd976085ec94a3d5005.tar.gz
bcm5719-llvm-75e30c4d5d5b83dc3fec0dd976085ec94a3d5005.zip
GlobalISel: Fix fewerElementsVector for ctlz with different result type
Also complete the set of related operations. llvm-svn: 354480
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp4
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir64
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir64
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir60
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir60
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir64
7 files changed, 319 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 82ba0b8d09c..7abce5a3fa8 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2129,11 +2129,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
- case G_CTLZ:
return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
+ case G_CTLZ:
+ case G_CTLZ_ZERO_UNDEF:
+ case G_CTTZ:
+ case G_CTTZ_ZERO_UNDEF:
+ case G_CTPOP:
return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
case G_ZEXT:
case G_SEXT:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 300f5c18870..0d0496eaa03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -306,8 +306,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
G_CTPOP})
.legalFor({{S32, S32}, {S32, S64}})
.clampScalar(0, S32, S32)
- .clampScalar(1, S32, S64);
- // TODO: Scalarize
+ .clampScalar(1, S32, S64)
+ .scalarize(0);
// TODO: Expand for > s32
getActionDefinitionsBuilder(G_BSWAP)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
index aa01879f7bf..ff93788bc86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
@@ -91,3 +91,67 @@ body: |
%3:_(s32) = G_ZEXT %2
$vgpr0 = COPY %3
...
+
+---
+name: ctlz_zero_undef_v2s32_v2s32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32)
+ ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctlz_zero_undef_v2s32_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64)
+ ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctlz_zero_undef_v2s16_v2s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: ctlz_zero_undef_v2s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]]
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+ ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT1]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0
+ $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
index acdfc3abd57..5cbdea9d83d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
@@ -91,3 +91,67 @@ body: |
%3:_(s32) = G_ZEXT %2
$vgpr0 = COPY %3
...
+
+---
+name: ctlz_v2s32_v2s32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: ctlz_v2s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s32)
+ ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_CTLZ %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctlz_v2s32_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: ctlz_v2s32_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s64)
+ ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_CTLZ %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctlz_v2s16_v2s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: ctlz_v2s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+ ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C]]
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+ ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT1]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = G_CTLZ %0
+ $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
index d0a272ea233..ebb59c1bd0d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
@@ -89,3 +89,63 @@ body: |
%3:_(s32) = G_ZEXT %2
$vgpr0 = COPY %3
...
+
+---
+name: ctpop_v2s32_v2s32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: ctpop_v2s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s32)
+ ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_CTPOP %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctpop_v2s32_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: ctpop_v2s32_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s64)
+ ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_CTPOP %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: ctpop_v2s16_v2s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: ctpop_v2s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+ ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT1]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = G_CTPOP %0
+ $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
index cb474c59ee1..ba3516be384 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
@@ -89,3 +89,63 @@ body: |
%3:_(s32) = G_ZEXT %2
$vgpr0 = COPY %3
...
+
+---
+name: cttz_zero_undef_v2s32_v2s32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32)
+ ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: cttz_zero_undef_v2s32_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s64)
+ ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: cttz_zero_undef_v2s16_v2s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: cttz_zero_undef_v2s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+ ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+ ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT1]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0
+ $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
index 66e5951198f..a73f5afcd3d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -91,3 +91,67 @@ body: |
%3:_(s32) = G_ZEXT %2
$vgpr0 = COPY %3
...
+
+---
+name: cttz_v2s32_v2s32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: cttz_v2s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s32)
+ ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s32>) = G_CTTZ %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: cttz_v2s32_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: cttz_v2s32_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s64)
+ ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_CTTZ %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: cttz_v2s16_v2s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: cttz_v2s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[C]]
+ ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[C1]]
+ ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[OR1]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = G_CTTZ %0
+ $vgpr0 = COPY %1
+...
OpenPOWER on IntegriCloud