diff options
6 files changed, 273 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0d0496eaa03..dd05534dc3a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -307,7 +307,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor({{S32, S32}, {S32, S64}}) .clampScalar(0, S32, S32) .clampScalar(1, S32, S64) - .scalarize(0); + .scalarize(0) + .widenScalarToNextPow2(0, 32) + .widenScalarToNextPow2(1, 32); // TODO: Expand for > s32 getActionDefinitionsBuilder(G_BSWAP) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index ff93788bc86..a1322c950bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -155,3 +155,61 @@ body: | %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0 $vgpr0 = COPY %1 ... + +--- +name: ctlz_zero_undef_s7_s7 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s7_s7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: $vgpr0 = COPY [[AND1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s7) = G_TRUNC %0 + %2:_(s7) = G_CTLZ_ZERO_UNDEF %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: ctlz_zero_undef_s33_s33 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ctlz_zero_undef_s33_s33 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) + ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s33) = G_TRUNC %0 + %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index 5cbdea9d83d..41a5daa87e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -155,3 +155,61 @@ body: | %1:_(<2 x s16>) = G_CTLZ %0 $vgpr0 = COPY %1 ... + +--- +name: ctlz_s7_s7 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ctlz_s7_s7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: $vgpr0 = COPY [[AND1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s7) = G_TRUNC %0 + %2:_(s7) = G_CTLZ %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: ctlz_s33_s33 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ctlz_s33_s33 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) + ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s33) = G_TRUNC %0 + %2:_(s33) = G_CTLZ %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir index ebb59c1bd0d..71662350767 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -149,3 +149,53 @@ body: | %1:_(<2 x s16>) = G_CTPOP %0 $vgpr0 = COPY %1 ... + +--- +name: ctpop_s7_s7 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ctpop_s7_s7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AND1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s7) = G_TRUNC %0 + %2:_(s7) = G_CTPOP %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: ctpop_s33_s33 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: ctpop_s33_s33 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTPOP]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C1]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s33) = G_TRUNC %0 + %2:_(s33) = G_CTPOP %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir index ba3516be384..800a64e5cb6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -149,3 +149,53 @@ body: | %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0 $vgpr0 = COPY %1 ... + +--- +name: cttz_zero_undef_s7_s7 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: cttz_zero_undef_s7_s7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AND1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s7) = G_TRUNC %0 + %2:_(s7) = G_CTTZ_ZERO_UNDEF %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: cttz_zero_undef_s33_s33 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: cttz_zero_undef_s33_s33 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C1]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s33) = G_TRUNC %0 + %2:_(s33) = G_CTTZ_ZERO_UNDEF %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index a73f5afcd3d..2e9a40a8893 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -155,3 +155,57 @@ body: | %1:_(<2 x s16>) = G_CTTZ %0 $vgpr0 = COPY %1 ... + +--- +name: cttz_s7_s7 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: cttz_s7_s7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: $vgpr0 = COPY [[AND1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s7) = G_TRUNC %0 + %2:_(s7) = G_CTTZ %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: cttz_s33_s33 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: cttz_s33_s33 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]] + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; CHECK: $vgpr0_vgpr1 = COPY [[AND1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s33) = G_TRUNC %0 + %2:_(s33) = G_CTTZ %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +... |