summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp5
2 files changed, 52 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index daea9a9b5fe..ddcdd9debdd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1694,6 +1694,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_UMIN:
case G_UMAX:
return lowerMinMax(MI, TypeIdx, Ty);
+ case G_FCOPYSIGN:
+ return lowerFCopySign(MI, TypeIdx, Ty);
}
}
@@ -3223,3 +3225,51 @@ LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const LLT Src0Ty = MRI.getType(Src0);
+ const LLT Src1Ty = MRI.getType(Src1);
+
+ const int Src0Size = Src0Ty.getScalarSizeInBits();
+ const int Src1Size = Src1Ty.getScalarSizeInBits();
+
+ auto SignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getSignMask(Src0Size));
+
+ auto NotSignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+
+ auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
+ MachineInstr *Or;
+
+ if (Src0Ty == Src1Ty) {
+ auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else if (Src0Size > Src1Size) {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
+ auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
+ auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
+ auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
+ auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ }
+
+ // Be careful about setting nsz/nnan/ninf on every instruction, since the
+ // constants are a nan and -0.0, but the final result should preserve
+ // everything.
+ if (unsigned Flags = MI.getFlags())
+ Or->setFlags(Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 410d038fc65..f33fe2e128d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -298,9 +298,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lowerFor({{S64, S16}}) // FIXME: Implement
.scalarize(0);
- getActionDefinitionsBuilder(G_FCOPYSIGN)
- .legalForCartesianProduct({S16, S32, S64}, {S16, S32, S64})
- .scalarize(0);
+ // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
+ getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
getActionDefinitionsBuilder(G_FSUB)
// Use actual fsub instruction
OpenPOWER on IntegriCloud