summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-04 22:26:33 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-04 22:26:33 +0000
commit3d6a49b0b91016653259a7281d6a13abe2d3450e (patch)
tree3bd79f1284bfff39d5a0ea012088b9a77a947053
parentcba0c6d0c9d2c610c566e4b46b8f0315a2cfebbc (diff)
downloadbcm5719-llvm-3d6a49b0b91016653259a7281d6a13abe2d3450e.tar.gz
bcm5719-llvm-3d6a49b0b91016653259a7281d6a13abe2d3450e.zip
GlobalISel: Fix not calling observer when legalizing bitcount ops
This was hiding bugs from never legalizing the source type. llvm-svn: 353102
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp23
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir13
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir13
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir11
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir11
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir13
-rw-r--r--llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp65
7 files changed, 118 insertions, 31 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 28c103a00b9..cd8c8f8fd1d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1083,23 +1083,27 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP: {
if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
return Legalized;
}
+ unsigned SrcReg = MI.getOperand(1).getReg();
+
// First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
- LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+ auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ LLT CurTy = MRI.getType(SrcReg);
if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
- MIBSrc = MIRBuilder.buildInstr(
- TargetOpcode::G_OR, {WideTy},
- {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+ MIBSrc = MIRBuilder.buildOr(
+ WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
}
+
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
@@ -1111,12 +1115,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
TargetOpcode::G_SUB, {WideTy},
{MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
}
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- // Make the original instruction a trunc now, and update its source.
- Observer.changingInstr(MI);
- MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
- MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
- Observer.changedInstr(MI);
+
+ MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
+ MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_BSWAP: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
index a53a1920eb0..aa01879f7bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
@@ -74,12 +74,17 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: ctlz_zero_undef_s16_s16
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AND]](s32)
+ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+ ; CHECK: $vgpr0 = COPY [[AND1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_CTLZ_ZERO_UNDEF %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
index e81da56526f..acdfc3abd57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
@@ -74,12 +74,17 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: ctlz_s16_s16
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AND]](s32)
+ ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+ ; CHECK: $vgpr0 = COPY [[AND1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_CTLZ %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
index 190ad3ab38f..d0a272ea233 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir
@@ -74,12 +74,15 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: ctpop_s16_s16
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AND]](s32)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AND1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_CTPOP %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
index 91ea7ce7204..cb474c59ee1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir
@@ -74,12 +74,15 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: cttz_zero_undef_s16_s16
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AND]](s32)
+ ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AND1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_CTTZ_ZERO_UNDEF %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
index 4bfce8cdaf3..66e5951198f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir
@@ -74,12 +74,17 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: cttz_s16_s16
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AND]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]]
+ ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+ ; CHECK: $vgpr0 = COPY [[AND1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_CTTZ %1
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 815f1e3d321..a9c9a938db5 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -114,6 +114,71 @@ TEST_F(GISelMITest, LowerBitCountingCTTZ2) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}
+// CTPOP widening.
+TEST_F(GISelMITest, WidenBitCountingCTPOP1) {
+ if (!TM)
+ return;
+
+ // Declare your legalization info
+ DefineLegalizerInfo(A, {
+ getActionDefinitionsBuilder(G_CTPOP).legalFor({{s16, s16}});
+ });
+
+ // Build
+ // Trunc it to s8.
+ LLT s8{LLT::scalar(8)};
+ LLT s16{LLT::scalar(16)};
+ auto MIBTrunc = B.buildTrunc(s8, Copies[0]);
+ auto MIBCTPOP = B.buildInstr(TargetOpcode::G_CTPOP, {s16}, {MIBTrunc});
+ AInfo Info(MF->getSubtarget());
+ DummyGISelObserver Observer;
+ LegalizerHelper Helper(*MF, Info, Observer, B);
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.widenScalar(*MIBCTPOP, 1, s16));
+
+ auto CheckStr = R"(
+ CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC %0:_(s64)
+ CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC]]:_(s8)
+ CHECK: [[CTPOP:%[0-9]+]]:_(s16) = G_CTPOP [[ZEXT]]
+ CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY [[CTPOP]]:_(s16)
+ )";
+
+ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
+// Test a strange case where the result is wider than the source
+TEST_F(GISelMITest, WidenBitCountingCTPOP2) {
+ if (!TM)
+ return;
+
+ // Declare your legalization info
+ DefineLegalizerInfo(A, {
+ getActionDefinitionsBuilder(G_CTPOP).legalFor({{s32, s16}});
+ });
+
+ // Build
+ // Trunc it to s8.
+ LLT s8{LLT::scalar(8)};
+ LLT s16{LLT::scalar(16)};
+ LLT s32{LLT::scalar(32)};
+ auto MIBTrunc = B.buildTrunc(s8, Copies[0]);
+ auto MIBCTPOP = B.buildInstr(TargetOpcode::G_CTPOP, {s32}, {MIBTrunc});
+ AInfo Info(MF->getSubtarget());
+ DummyGISelObserver Observer;
+ LegalizerHelper Helper(*MF, Info, Observer, B);
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.widenScalar(*MIBCTPOP, 1, s16));
+
+ auto CheckStr = R"(
+ CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC %0:_(s64)
+ CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC]]:_(s8)
+ CHECK: [[CTPOP:%[0-9]+]]:_(s16) = G_CTPOP [[ZEXT]]
+ CHECK: [[COPY:%[0-9]+]]:_(s32) = G_ZEXT [[CTPOP]]:_(s16)
+ )";
+
+ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
// CTTZ_ZERO_UNDEF expansion in terms of CTTZ
TEST_F(GISelMITest, LowerBitCountingCTTZ3) {
if (!TM)
OpenPOWER on IntegriCloud