[X86] Add custom type legalization for extending v4i8/v4i16->v4i64.

Pre-SSE4.1 sext_invec for v2i64 is complicated because we don't have a v2i64 sra instruction. So instead we sign extend to i32 using unpack and sra, then copy the elements and do a v4i32 sra to fill with sign bits, then interleave the i32 sign extend and the sign bits. So really we're doing to two sign extends but only using half of the v4i32 intermediate result. When the result is more than 128 bits, default type legalization would prefer to split the destination type all the way down to v2i64 with shuffles followed by v16i8/v8i16->v2i64 sext_inreg operations. This results in more instructions than necessary because we are only utilizing the lower 2 elements of the v4i32 intermediate result. Instead we can custom split a v4i8/v4i16->v4i64 sign_extend. Then we can sign extend v4i8/v4i16->v4i32 invec producing a full v4i32 result. Create the sign bit vector as a v4i32 then split and interleave with the sign bits using an punpackldq and punpackhdq. llvm-svn: 347176
author: Craig Topper <craig.topper@intel.com> 2018-11-18 21:28:50 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-11-18 21:28:50 +0000
commit: 0468c860b7b078bdd47d645e113481a4ab646178 (patch)
tree: 11f2c24ce93fb6ceace2da2d0a2df8bc56ef418f /llvm/lib
parent: 950f3842ccaf0ccc2e52928fb49a9d3b3164e96e (diff)
download: bcm5719-llvm-0468c860b7b078bdd47d645e113481a4ab646178.tar.gz
bcm5719-llvm-0468c860b7b078bdd47d645e113481a4ab646178.zip
1 files changed, 34 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2bfa4461a39..9247a72a63d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -949,12 +949,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
 
     if (ExperimentalVectorWideningLegalization) {
-      setOperationAction(ISD::TRUNCATE, MVT::v2i8,  Custom);
-      setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
-      setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
-      setOperationAction(ISD::TRUNCATE, MVT::v4i8,  Custom);
-      setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
-      setOperationAction(ISD::TRUNCATE, MVT::v8i8,  Custom);
+      setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i8,  Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i16, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i32, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v4i8,  Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v4i16, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v8i8,  Custom);
     }
 
     // In the customized shift lowering, the legal v4i32/v2i64 cases
@@ -26349,10 +26351,34 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return;
 
     EVT VT = N->getValueType(0);
-    assert((VT == MVT::v16i32 || VT == MVT::v8i64) && "Unexpected VT!");
     SDValue In = N->getOperand(0);
     EVT InVT = In.getValueType();
-    if (InVT.is128BitVector()) {
+    if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+        (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+      // Custom split this so we can extend i8/i16->i32 invec. This is better
+      // since sign_extend_inreg i8/i16->i64 requires two sra operations. So
+      // this allows the first to be shared.
+      In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
+
+      // Fill a vector with sign bits for each element.
+      SDValue SignBits = DAG.getNode(ISD::SRA, dl, MVT::v4i32, In,
+                                     DAG.getConstant(31, dl, MVT::v4i32));
+
+      // Create an unpackl and unpackh to interleave the sign bits then bitcast
+      // to v2i64.
+      SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+                                        {0, 4, 1, 5});
+      Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
+      SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+                                        {2, 6, 3, 7});
+      Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
+
+      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+      Results.push_back(Res);
+      return;
+    }
+
+    if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
       // Perform custom splitting instead of the two stage extend we would get
       // by default.
       EVT LoVT, HiVT;
author	Craig Topper <craig.topper@intel.com>	2018-11-18 21:28:50 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-11-18 21:28:50 +0000
commit	0468c860b7b078bdd47d645e113481a4ab646178 (patch)
tree	11f2c24ce93fb6ceace2da2d0a2df8bc56ef418f /llvm/lib
parent	950f3842ccaf0ccc2e52928fb49a9d3b3164e96e (diff)
download	bcm5719-llvm-0468c860b7b078bdd47d645e113481a4ab646178.tar.gz bcm5719-llvm-0468c860b7b078bdd47d645e113481a4ab646178.zip