diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 24 | 
3 files changed, 53 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6c0a3af2351..30c3d531fb5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15168,32 +15168,57 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,      assert(EltVT == MVT::f32 || EltVT == MVT::f64);  #endif -    unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); -    unsigned Opc = X86ISD::CMPP; +    unsigned Opc;      if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {        assert(VT.getVectorNumElements() <= 16);        Opc = X86ISD::CMPM; -    } -    // In the two special cases we can't handle, emit two comparisons. +    } else { +      Opc = X86ISD::CMPP; +      // The SSE/AVX packed FP comparison nodes are defined with a +      // floating-point vector result that matches the operand type. This allows +      // them to work with an SSE1 target (integer vector types are not legal). +      VT = Op0.getSimpleValueType(); +    } + +    // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE), +    // emit two comparisons and a logic op to tie them together. +    // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is +    // available. +    SDValue Cmp; +    unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);      if (SSECC == 8) { +      // LLVM predicate is SETUEQ or SETONE.        unsigned CC0, CC1;        unsigned CombineOpc;        if (SetCCOpcode == ISD::SETUEQ) { -        CC0 = 3; CC1 = 0; CombineOpc = ISD::OR; +        CC0 = 3; // UNORD +        CC1 = 0; // EQ +        CombineOpc = Opc == X86ISD::CMPP ? X86ISD::FOR : ISD::OR;        } else {          assert(SetCCOpcode == ISD::SETONE); -        CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; +        CC0 = 7; // ORD +        CC1 = 4; // NEQ +        CombineOpc = Opc == X86ISD::CMPP ? X86ISD::FAND : ISD::AND;        }        SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,                                   DAG.getConstant(CC0, dl, MVT::i8));        SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,                                   DAG.getConstant(CC1, dl, MVT::i8)); -      return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); +      Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); +    } else { +      // Handle all other FP comparisons here. +      Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, +                        DAG.getConstant(SSECC, dl, MVT::i8));      } -    // Handle all other FP comparisons here. -    return DAG.getNode(Opc, dl, VT, Op0, Op1, -                       DAG.getConstant(SSECC, dl, MVT::i8)); + +    // If this is SSE/AVX CMPP, bitcast the result back to integer to match the +    // result type of SETCC. The bitcast is expected to be optimized away +    // during combining/isel. +    if (Opc == X86ISD::CMPP) +      Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); + +    return Cmp;    }    MVT VTOp0 = Op0.getSimpleValueType(); @@ -29647,6 +29672,11 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,      }    } +  // For an SSE1-only target, lower to X86ISD::CMPP early to avoid scalarization +  // via legalization because v4i32 is not a legal type. +  if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) +    return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); +    return SDValue();  } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 958bb822a06..e2155972cc5 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -35,7 +35,7 @@ def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;  // SSE specific DAG Nodes.  //===----------------------------------------------------------------------===// -def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, +def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>,                                         SDTCisFP<1>, SDTCisVT<3, i8>,                                         SDTCisVec<1>]>;  def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,  diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d588fba1dad..661f733a1b9 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2498,36 +2498,36 @@ let Constraints = "$src1 = $dst" in {  }  let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),            (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)), +def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)),            (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),            (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)), +def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)),            (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), +def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),            (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; -def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)), +def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)),            (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; -def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), +def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),            (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; -def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)), +def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)),            (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;  }  let Predicates = [UseSSE1] in { -def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),            (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)), +def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)),            (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;  }  let Predicates = [UseSSE2] in { -def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),            (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)), +def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)),            (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;  }  | 

