summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-17 23:48:43 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-17 23:48:43 +0000
commit5a321b899e7a10b45c3ee4003fb2a048f35074a9 (patch)
treeef7f04552525c95db5ad398c5956c71ddb8b833d
parentd57f7cc15e22888773d3ad663160ccc66fc7656b (diff)
downloadbcm5719-llvm-5a321b899e7a10b45c3ee4003fb2a048f35074a9.tar.gz
bcm5719-llvm-5a321b899e7a10b45c3ee4003fb2a048f35074a9.zip
GlobalISel: Use the original flags when lowering fneg to fsub
This was ignoring the flag on fneg, and using the source instruction's flags. Also fixes tests missing from r358702. Note the expansion itself isn't correct without nnan, but that should be fixed separately. llvm-svn: 363637
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp3
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir30
-rw-r--r--llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp46
4 files changed, 108 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index bb10291b6e7..cdb139910dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1463,9 +1463,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
unsigned SubByReg = MI.getOperand(1).getReg();
unsigned ZeroReg = Zero->getOperand(0).getReg();
- MachineInstr *SrcMI = MRI.getVRegDef(SubByReg);
MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
- SrcMI->getFlags());
+ MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
new file mode 100644
index 00000000000..dc660bfca29
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
+
+; Check flags are preserved for a regular instruction.
+; CHECK-LABEL: name: fadd_nnan
+; CHECK: nnan G_FADD
+define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
+ %res = fadd nnan float %arg0, %arg1
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+; Check flags are preserved for a specially handled intrinsic
+; CHECK-LABEL: name: fma_fast
+; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA
+define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
+ %res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2)
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+; Check flags are preserved for an arbitrarry target intrinsic
+; CHECK-LABEL: name: rcp_nsz
+; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32)
+define amdgpu_kernel void @rcp_nsz(float %arg0) {
+ %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.amdgcn.rcp.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
index bdba3d6926d..4c7902368c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
@@ -60,6 +60,36 @@ body: |
...
---
+name: test_fsub_s64_fmf
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; SI-LABEL: name: test_fsub_s64_fmf
+ ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; SI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; SI: $vgpr0_vgpr1 = COPY %2(s64)
+ ; VI-LABEL: name: test_fsub_s64_fmf
+ ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; VI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; VI: $vgpr0_vgpr1 = COPY %2(s64)
+ ; GFX9-LABEL: name: test_fsub_s64_fmf
+ ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; GFX9: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = nnan nsz G_FSUB %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
name: test_fsub_s16
body: |
bb.0:
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 67b897fb6a3..09082b20bc9 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -729,4 +729,50 @@ TEST_F(GISelMITest, FewerElementsPhi) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}
+
+// FNEG expansion in terms of FSUB
+TEST_F(GISelMITest, LowerFNEG) {
+ if (!TM)
+ return;
+
+ // Declare your legalization info
+ DefineLegalizerInfo(A, {
+ getActionDefinitionsBuilder(G_FSUB).legalFor({s64});
+ });
+
+ // Build Instr. Make sure FMF are preserved.
+ auto FAdd =
+ B.buildInstr(TargetOpcode::G_FADD, {LLT::scalar(64)}, {Copies[0], Copies[1]},
+ MachineInstr::MIFlag::FmNsz);
+
+ // Should not propagate the flags of src instruction.
+ auto FNeg0 =
+ B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {FAdd.getReg(0)},
+ {MachineInstr::MIFlag::FmArcp});
+
+ // Preserve the one flag.
+ auto FNeg1 =
+ B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {Copies[0]},
+ MachineInstr::MIFlag::FmNoInfs);
+
+ AInfo Info(MF->getSubtarget());
+ DummyGISelObserver Observer;
+ LegalizerHelper Helper(*MF, Info, Observer, B);
+ // Perform Legalization
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.lower(*FNeg0, 0, LLT::scalar(64)));
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.lower(*FNeg1, 0, LLT::scalar(64)));
+
+ auto CheckStr = R"(
+ CHECK: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD %0:_, %1:_
+ CHECK: [[CONST0:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
+ CHECK: [[FSUB0:%[0-9]+]]:_(s64) = arcp G_FSUB [[CONST0]]:_, [[FADD]]:_
+ CHECK: [[CONST1:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
+ CHECK: [[FSUB1:%[0-9]+]]:_(s64) = ninf G_FSUB [[CONST1]]:_, %0:_
+ )";
+
+ // Check
+ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
} // namespace
OpenPOWER on IntegriCloud