summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp3
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir30
-rw-r--r--llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp46
4 files changed, 108 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index bb10291b6e7..cdb139910dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1463,9 +1463,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
unsigned SubByReg = MI.getOperand(1).getReg();
unsigned ZeroReg = Zero->getOperand(0).getReg();
- MachineInstr *SrcMI = MRI.getVRegDef(SubByReg);
MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
- SrcMI->getFlags());
+ MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
new file mode 100644
index 00000000000..dc660bfca29
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
+
+; Check flags are preserved for a regular instruction.
+; CHECK-LABEL: name: fadd_nnan
+; CHECK: nnan G_FADD
+define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
+ %res = fadd nnan float %arg0, %arg1
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+; Check flags are preserved for a specially handled intrinsic
+; CHECK-LABEL: name: fma_fast
+; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA
+define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
+ %res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2)
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+; Check flags are preserved for an arbitrarry target intrinsic
+; CHECK-LABEL: name: rcp_nsz
+; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32)
+define amdgpu_kernel void @rcp_nsz(float %arg0) {
+ %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.amdgcn.rcp.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
index bdba3d6926d..4c7902368c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
@@ -60,6 +60,36 @@ body: |
...
---
+name: test_fsub_s64_fmf
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; SI-LABEL: name: test_fsub_s64_fmf
+ ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; SI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; SI: $vgpr0_vgpr1 = COPY %2(s64)
+ ; VI-LABEL: name: test_fsub_s64_fmf
+ ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; VI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; VI: $vgpr0_vgpr1 = COPY %2(s64)
+ ; GFX9-LABEL: name: test_fsub_s64_fmf
+ ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
+ ; GFX9: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
+ ; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = nnan nsz G_FSUB %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
name: test_fsub_s16
body: |
bb.0:
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 67b897fb6a3..09082b20bc9 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -729,4 +729,50 @@ TEST_F(GISelMITest, FewerElementsPhi) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}
+
+// FNEG expansion in terms of FSUB
+TEST_F(GISelMITest, LowerFNEG) {
+ if (!TM)
+ return;
+
+ // Declare your legalization info
+ DefineLegalizerInfo(A, {
+ getActionDefinitionsBuilder(G_FSUB).legalFor({s64});
+ });
+
+ // Build Instr. Make sure FMF are preserved.
+ auto FAdd =
+ B.buildInstr(TargetOpcode::G_FADD, {LLT::scalar(64)}, {Copies[0], Copies[1]},
+ MachineInstr::MIFlag::FmNsz);
+
+ // Should not propagate the flags of src instruction.
+ auto FNeg0 =
+ B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {FAdd.getReg(0)},
+ {MachineInstr::MIFlag::FmArcp});
+
+ // Preserve the one flag.
+ auto FNeg1 =
+ B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {Copies[0]},
+ MachineInstr::MIFlag::FmNoInfs);
+
+ AInfo Info(MF->getSubtarget());
+ DummyGISelObserver Observer;
+ LegalizerHelper Helper(*MF, Info, Observer, B);
+ // Perform Legalization
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.lower(*FNeg0, 0, LLT::scalar(64)));
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.lower(*FNeg1, 0, LLT::scalar(64)));
+
+ auto CheckStr = R"(
+ CHECK: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD %0:_, %1:_
+ CHECK: [[CONST0:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
+ CHECK: [[FSUB0:%[0-9]+]]:_(s64) = arcp G_FSUB [[CONST0]]:_, [[FADD]]:_
+ CHECK: [[CONST1:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
+ CHECK: [[FSUB1:%[0-9]+]]:_(s64) = ninf G_FSUB [[CONST1]]:_, %0:_
+ )";
+
+ // Check
+ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
} // namespace
OpenPOWER on IntegriCloud