summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp171
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h6
-rw-r--r--llvm/test/CodeGen/X86/fp128-cast.ll20
-rw-r--r--llvm/test/CodeGen/X86/fp128-compare.ll11
-rw-r--r--llvm/test/CodeGen/X86/fp128-i128.ll11
5 files changed, 177 insertions, 42 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1e3127f1d12..8cc4b5f56fe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -337,6 +337,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FREM , MVT::f128 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
@@ -383,15 +384,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// There's never any support for operations beyond MVT::f32.
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
@@ -625,19 +630,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87, except f128 in SSE.
+ // f80 always uses X87.
if (UseX87) {
- if (Subtarget.is64Bit() && Subtarget.hasSSE1()) {
- addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
- setOperationAction(ISD::FABS , MVT::f128, Custom);
- setOperationAction(ISD::FNEG , MVT::f128, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
-
- addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
- }
-
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -673,10 +667,60 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
}
+ // f128 uses xmm registers, but most operations require libcalls.
+ if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
+
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
+
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ // We need to custom handle any FP_ROUND with an f128 input, but
+ // LegalizeDAG uses the result type to know when to run a custom handler.
+ // So we have to list all legal floating point result types here.
+ if (isTypeLegal(MVT::f32)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
+ }
+ if (isTypeLegal(MVT::f64)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+ }
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f80, Expand);
+ }
+
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f128 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -786,6 +830,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1139,6 +1185,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom);
+
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
@@ -1400,6 +1448,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom);
+
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
@@ -4661,6 +4711,10 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
+ if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+ // X >= 0 -> X == 0, jump on !sign.
+ return X86::COND_NS;
+ }
if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, DL, RHS.getValueType());
@@ -18275,6 +18329,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (VT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -18634,16 +18691,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT SrcVT = N0.getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
+
+ if (DstVT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
- if (Op.getSimpleValueType().isVector())
+ if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
- MVT SrcVT = N0.getSimpleValueType();
- MVT DstVT = Op.getSimpleValueType();
-
if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
(SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
// Conversions from unsigned i32 to f32/f64 are legal,
@@ -19371,6 +19430,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ if (SrcVT == MVT::f128) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(SrcVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(SrcVT, VT);
+
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first;
+ }
+
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -19446,12 +19516,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
-static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
+ if (VT == MVT::f128) {
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
@@ -19459,6 +19534,33 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
+SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved
+ if (SVT != MVT::f128)
+ return Op;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT);
+
+ // FP_ROUND node has a second operand indicating whether it is known to be
+ // precise. That doesn't take part in the LibCall so we can't directly use
+ // LowerF128Call.
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
+}
+
+// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
+// the default expansion of STRICT_FP_ROUND.
+static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
+ // FIXME: Need to form a libcall with an input chain for f128.
+ assert(Op.getOperand(0).getValueType() != MVT::f128 &&
+ "Don't know how to handle f128 yet!");
+ return Op;
+}
+
/// Horizontal vector math instructions may be slower than normal math with
/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
/// implementation, and likely shuffle complexity of the alternate sequence.
@@ -19543,8 +19645,13 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
-static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::f128) {
+ RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
+ : RTLIB::SUB_F128;
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
@@ -20874,6 +20981,19 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // Handle f128 first, since one possible outcome is a normal integer
+ // comparison which gets handled by emitFlagsForSetcc.
+ if (Op0.getValueType() == MVT::f128) {
+ softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (!Op1.getNode()) {
+ assert(Op0.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return Op0;
+ }
+ }
+
SDValue X86CC;
SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
if (!EFLAGS)
@@ -27579,6 +27699,13 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
return NOOP;
}
+SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+}
+
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -27625,10 +27752,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
- case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget);
+ case ISD::FSUB: return lowerFaddFsub(Op, DAG);
+ case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 82e3b989ec5..6f2903aedd4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1342,6 +1342,12 @@ namespace llvm {
SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index 7dde098c0c4..3089add4229 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -505,15 +505,11 @@ entry:
define void @TestFPTruncF128_F80() nounwind {
; X64-SSE-LABEL: TestFPTruncF128_F80:
; X64-SSE: # %bb.0: # %entry
-; X64-SSE-NEXT: subq $24, %rsp
+; X64-SSE-NEXT: pushq %rax
; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: callq __trunctfxf2
-; X64-SSE-NEXT: fstpt (%rsp)
-; X64-SSE-NEXT: movq (%rsp), %rax
-; X64-SSE-NEXT: movq %rax, {{.*}}(%rip)
-; X64-SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movw %ax, vf80+{{.*}}(%rip)
-; X64-SSE-NEXT: addq $24, %rsp
+; X64-SSE-NEXT: fstpt {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
; X64-SSE-NEXT: retq
;
; X32-LABEL: TestFPTruncF128_F80:
@@ -531,15 +527,11 @@ define void @TestFPTruncF128_F80() nounwind {
;
; X64-AVX-LABEL: TestFPTruncF128_F80:
; X64-AVX: # %bb.0: # %entry
-; X64-AVX-NEXT: subq $24, %rsp
+; X64-AVX-NEXT: pushq %rax
; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
; X64-AVX-NEXT: callq __trunctfxf2
-; X64-AVX-NEXT: fstpt (%rsp)
-; X64-AVX-NEXT: movq (%rsp), %rax
-; X64-AVX-NEXT: movq %rax, {{.*}}(%rip)
-; X64-AVX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; X64-AVX-NEXT: movw %ax, vf80+{{.*}}(%rip)
-; X64-AVX-NEXT: addq $24, %rsp
+; X64-AVX-NEXT: fstpt {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
; X64-AVX-NEXT: retq
entry:
%0 = load fp128, fp128* @vf128, align 16
diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll
index 6f2b0c514a8..f7dd5a45363 100644
--- a/llvm/test/CodeGen/X86/fp128-compare.ll
+++ b/llvm/test/CodeGen/X86/fp128-compare.ll
@@ -48,7 +48,10 @@ define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq __lttf2
-; CHECK-NEXT: shrl $31, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: sets %cl
+; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@@ -56,9 +59,9 @@ entry:
%cmp = fcmp olt fp128 %d1, %d2
%conv = zext i1 %cmp to i32
ret i32 %conv
-; The 'shrl' is a special optimization in llvm to combine
-; the effect of 'fcmp olt' and 'zext'. The main purpose is
-; to test soften call to __lttf2.
+; FIXME: This used to generate a shrl to move the sign bit of eax into bit 0.
+; This no longer happens with fp128 compares being expanded by LegalizeDAG.
+; We can add a new DAG combine for X86ISD::CMP/SETCC to restore this.
}
define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll
index f18b3e46e7a..57776af4a57 100644
--- a/llvm/test/CodeGen/X86/fp128-i128.ll
+++ b/llvm/test/CodeGen/X86/fp128-i128.ll
@@ -160,11 +160,14 @@ define fp128 @TestI128_1(fp128 %x) #0 {
; AVX-NEXT: vmovaps (%rsp), %xmm0
; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm1
; AVX-NEXT: callq __lttf2
-; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testl %eax, %eax
-; AVX-NEXT: sets %cl
-; AVX-NEXT: shlq $4, %rcx
-; AVX-NEXT: vmovaps {{\.LCPI.*}}(%rcx), %xmm0
+; AVX-NEXT: js .LBB2_1
+; AVX-NEXT: # %bb.2: # %entry
+; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB2_1:
+; AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: retq
entry:
OpenPOWER on IntegriCloud