summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorKristof Beyls <kristof.beyls@arm.com>2019-10-08 08:25:42 +0000
committerKristof Beyls <kristof.beyls@arm.com>2019-10-08 08:25:42 +0000
commit78bfe3ab9475776ae72ca7c9446066f6eb816cc0 (patch)
treeed7b721d56d9495847a30e05500b877f49a52128 /llvm/lib
parentc9ddda84052659698b921e6c3a5bf7df9df599ce (diff)
downloadbcm5719-llvm-78bfe3ab9475776ae72ca7c9446066f6eb816cc0.tar.gz
bcm5719-llvm-78bfe3ab9475776ae72ca7c9446066f6eb816cc0.zip
[ARM] Generate vcmp instead of vcmpe
Based on the discussion in http://lists.llvm.org/pipermail/llvm-dev/2019-October/135574.html, the conclusion was reached that the ARM backend should produce vcmp instead of vcmpe instructions by default, i.e. not be producing an Invalid Operation exception when either arguments in a floating point compare are quiet NaNs. In the future, after constrained floating point intrinsics for floating point compare have been introduced, vcmpe instructions probably should be produced for those intrinsics - depending on the exact semantics they'll be defined to have. This patch logically consists of the following parts: - Revert http://llvm.org/viewvc/llvm-project?rev=294945&view=rev and http://llvm.org/viewvc/llvm-project?rev=294968&view=rev, which implemented fine-tuning for when to produce vcmpe (i.e. not do it for equality comparisons). The complexity introduced by those patches isn't needed anymore if we just always produce vcmp instead. Maybe these patches need to be reintroduced again once support is needed to map potential LLVM-IR constrained floating point compare intrinsics to the ARM instruction set. - Simply select vcmp, instead of vcmpe, see simple changes in lib/Target/ARM/ARMInstrVFP.td - Adapt lots of tests that tested for vcmpe (instead of vcmp). For all of these test, the intent of what is tested for isn't related to whether the vcmp should produce an Invalid Operation exception or not. Fixes PR43374. Differential Revision: https://reviews.llvm.org/D68463 llvm-svn: 374025
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp22
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp51
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td28
5 files changed, 36 insertions, 69 deletions
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 2fd11426c5a..3e3745f129c 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -191,7 +191,7 @@ class ARMFastISel final : public FastISel {
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt, bool isEquality);
+ bool isZExt);
bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
@@ -1259,8 +1259,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CI->isEquality()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
@@ -1349,7 +1348,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt, bool isEquality) {
+ bool isZExt) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
@@ -1397,19 +1396,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- // Equality comparisons shouldn't raise Invalid on uordered inputs.
- if (isEquality)
- CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
- else
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
break;
case MVT::f64:
isICmp = false;
- // Equality comparisons shouldn't raise Invalid on uordered inputs.
- if (isEquality)
- CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
- else
- CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
break;
case MVT::i1:
case MVT::i8:
@@ -1485,8 +1476,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
- CI->isEquality()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 45bf6763382..ec553708798 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1793,34 +1793,22 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
+ ARMCC::CondCodes &CondCode2) {
CondCode2 = ARMCC::AL;
- InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ:
- CondCode = ARMCC::EQ;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE:
- CondCode = ARMCC::MI;
- CondCode2 = ARMCC::GT;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ:
- CondCode = ARMCC::EQ;
- CondCode2 = ARMCC::VS;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
@@ -1828,10 +1816,7 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE:
- CondCode = ARMCC::NE;
- InvalidOnQNaN = false;
- break;
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
}
}
@@ -4259,15 +4244,13 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl,
- bool InvalidOnQNaN) const {
+ SelectionDAG &DAG, const SDLoc &dl) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
- SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -4284,12 +4267,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
- Cmp.getOperand(1), Cmp.getOperand(2));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
- Cmp.getOperand(1));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
@@ -4929,8 +4910,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- bool InvalidOnQNaN;
- FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
+ FPCCToARMCC(CC, CondCode, CondCode2);
// Normalize the fp compare. If RHS is zero we prefer to keep it there so we
// match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
@@ -4955,13 +4935,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
@@ -5188,11 +5168,10 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- bool InvalidOnQNaN;
- FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
+ FPCCToARMCC(CC, CondCode, CondCode2);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index b8ce4d65f75..a89ef250c0e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -818,7 +818,7 @@ class VectorType;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl, bool InvalidOnQNaN) const;
+ const SDLoc &dl) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index e25260d8b47..f75343675da 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -51,8 +51,6 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index d3380ab7cef..fdd961bfbb2 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
@@ -19,7 +19,7 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
@@ -548,12 +548,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
+ [/* For disassembly only; pattern left blank */]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -562,17 +562,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
- [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 1))]>;
+ [/* For disassembly only; pattern left blank */]>;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -581,7 +581,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
- [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 0))]>;
+ [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>;
} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
@@ -611,7 +611,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -619,7 +619,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -631,7 +631,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
- [(arm_cmpfp0 HPR:$Sd, (i32 1))]> {
+ [/* For disassembly only; pattern left blank */]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -639,7 +639,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -647,7 +647,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
+ [(arm_cmpfp0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -659,7 +659,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
- [(arm_cmpfp0 HPR:$Sd, (i32 0))]> {
+ [(arm_cmpfp0 HPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
OpenPOWER on IntegriCloud