summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2020-01-08 09:52:37 -0800
committerCraig Topper <craig.topper@intel.com>2020-01-08 10:06:01 -0800
commit3811417f39a7d0a370fac2923060f5ef8dacd8d7 (patch)
treea6e162aaa21ca7ca568700250ac808f96a9e678c /llvm/lib/Target
parentd60b3b4817cb9346b682bb75371c41642c273b13 (diff)
downloadbcm5719-llvm-3811417f39a7d0a370fac2923060f5ef8dacd8d7.tar.gz
bcm5719-llvm-3811417f39a7d0a370fac2923060f5ef8dacd8d7.zip
[X86] Custom type legalize v4i64->v4f32 uint_to_fp on sse4.1 targets in 64-bit mode
For v4i64->v4f32 uint_to_fp on pre-avx targets where v4i64 isn't legal we create to v2i64->v2f32 uint_to_fp that need to be shuffled together. Our codegen for v2i64->v2f32 involves detecting if the number is larger than (2^31 - 1), if so we do a special divison by 2 so we can do a signed conversion which we need to scalarize, then do a multiply by 2 at the end if we divided earlier. When v4i64 isn't legal we need to split the checking for a larger number and dividing by 2 into two v2i64 vectors. The scalar part can extract the 4 i64 values from those 4 splits. But we can reassemble the 4 scalar f32 results directly into a single v432 vector. Then we just need to combine the fixup indications from the 2 halves and we can do the final multiply by 2 fixup on all 4 values if needed at once using a single v4f32 blend and v4f32 fadd. Differential Revision: https://reviews.llvm.org/D72368
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
1 files changed, 11 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c2d76b1be8a..5cff861221a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1116,6 +1116,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// i8 vectors are custom because the source register and source
// source memory operand types are not the same width.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+ if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
+ // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
+ // do the pre and post work in the vector domain.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
+ // We need to mark SINT_TO_FP as Custom even though we want to expand it
+ // so that DAG combine doesn't try to turn it into uint_to_fp.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
+ }
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
@@ -1176,15 +1187,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
- if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
- // We need to mark SINT_TO_FP as Custom even though we want to expand it
- // so that DAG combine doesn't try to turn it into uint_to_fp.
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
- }
-
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
OpenPOWER on IntegriCloud