Make the BUILD_VECTOR lowering code much more aggressive w.r.t constant vectors.

Remove some done items from the todo list. llvm-svn: 27729
author: Chris Lattner <sabre@nondot.org> 2006-04-16 01:01:29 +0000
committer: Chris Lattner <sabre@nondot.org> 2006-04-16 01:01:29 +0000
commit: fa5aa396c251c42ce127407c075e296bff0f88b1 (patch)
tree: d02f471ea511c333c0c318acfe9a6ee7baa47e2d
parent: 9095186deb4ad286b764d1baa85c6acf653e4763 (diff)
download: bcm5719-llvm-fa5aa396c251c42ce127407c075e296bff0f88b1.tar.gz
bcm5719-llvm-fa5aa396c251c42ce127407c075e296bff0f88b1.zip
2 files changed, 106 insertions, 67 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 826ff4ea819..4cd0a476dd9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -936,8 +936,6 @@ static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {
 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
 // zero.   Return true if this is not an array of constants, false if it is.
 //
-// Note that VectorBits/UndefBits are returned in 'little endian' form, so
-// elements 0,1 go in VectorBits[0] and 2,3 go in VectorBits[1] for a v4i32.
 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
                                        uint64_t UndefBits[2]) {
   // Start with zero'd results.
@@ -948,7 +946,7 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
     SDOperand OpVal = BV->getOperand(i);
     
     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
-    unsigned SlotNo = i & (e/2-1);  // Which subpiece of the uint64_t it is.
+    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
 
     uint64_t EltBits = 0;
     if (OpVal.getOpcode() == ISD::UNDEF) {
@@ -974,6 +972,59 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
   return false;
 }
 
+// If this is a splat (repetition) of a value across the whole vector, return
+// the smallest size that splats it.  For example, "0x01010101010101..." is a
+// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and 
+// SplatSize = 1 byte.
+static bool isConstantSplat(const uint64_t Bits128[2], 
+                            const uint64_t Undef128[2],
+                            unsigned &SplatBits, unsigned &SplatUndef,
+                            unsigned &SplatSize) {
+  
+  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
+  // the same as the lower 64-bits, ignoring undefs.
+  if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
+    return false;  // Can't be a splat if two pieces don't match.
+  
+  uint64_t Bits64  = Bits128[0] | Bits128[1];
+  uint64_t Undef64 = Undef128[0] & Undef128[1];
+  
+  // Check that the top 32-bits are the same as the lower 32-bits, ignoring
+  // undefs.
+  if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
+    return false;  // Can't be a splat if two pieces don't match.
+
+  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
+  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
+
+  // If the top 16-bits are different than the lower 16-bits, ignoring
+  // undefs, we have an i32 splat.
+  if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
+    SplatBits = Bits32;
+    SplatUndef = Undef32;
+    SplatSize = 4;
+    return true;
+  }
+  
+  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
+  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
+
+  // If the top 8-bits are different than the lower 8-bits, ignoring
+  // undefs, we have an i16 splat.
+  if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
+    SplatBits = Bits16;
+    SplatUndef = Undef16;
+    SplatSize = 2;
+    return true;
+  }
+  
+  // Otherwise, we have an 8-bit splat.
+  SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
+  SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
+  SplatSize = 1;
+  return true;
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -989,54 +1040,52 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
     return SDOperand();   // Not a constant vector.
   
-  // See if this is all zeros.
-  if ((VectorBits[0] | VectorBits[1]) == 0) {
-    // Canonicalize all zero vectors to be v4i32.
-    if (Op.getValueType() != MVT::v4i32) {
-      SDOperand Z = DAG.getConstant(0, MVT::i32);
-      Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
-      Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+  // If this is a splat (repetition) of a value across the whole vector, return
+  // the smallest size that splats it.  For example, "0x01010101010101..." is a
+  // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and 
+  // SplatSize = 1 byte.
+  unsigned SplatBits, SplatUndef, SplatSize;
+  if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
+    bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
+    
+    // First, handle single instruction cases.
+    
+    // All zeros?
+    if (SplatBits == 0) {
+      // Canonicalize all zero vectors to be v4i32.
+      if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+        SDOperand Z = DAG.getConstant(0, MVT::i32);
+        Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
+        Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+      }
+      return Op;
     }
-    return Op;
-  }
-  
-  // Check to see if this is something we can use VSPLTI* to form.
-  MVT::ValueType CanonicalVT = MVT::Other;
-  SDNode *CST = 0;
-  
-  if ((CST = PPC::get_VSPLTI_elt(Op.Val, 4, DAG).Val))       // vspltisw
-    CanonicalVT = MVT::v4i32;
-  else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 2, DAG).Val))  // vspltish
-    CanonicalVT = MVT::v8i16;
-  else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 1, DAG).Val))  // vspltisb
-    CanonicalVT = MVT::v16i8;
-  
-  // If this matches one of the vsplti* patterns, force it to the canonical
-  // type for the pattern.
-  if (CST) {
-    if (Op.getValueType() != CanonicalVT) {
-      // Convert the splatted element to the right element type.
-      SDOperand Elt = DAG.getNode(ISD::TRUNCATE, 
-                                  MVT::getVectorBaseType(CanonicalVT), 
-                                  SDOperand(CST, 0));
-      std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
-      SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
-      Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+
+    // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+    int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
+    if (SextVal >= -16 && SextVal <= 15) {
+      const MVT::ValueType VTys[] = { // canonical VT to use for each size.
+        MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+      };
+      MVT::ValueType CanonicalVT = VTys[SplatSize-1];
+   
+      // If this is a non-canonical splat for this value, 
+      if (Op.getValueType() != CanonicalVT || HasAnyUndefs) {
+        SDOperand Elt = DAG.getConstant(SplatBits, 
+                                        MVT::getVectorBaseType(CanonicalVT));
+        std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
+        SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
+        Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+      }
+      return Op;
     }
-    return Op;
-  }
-  
-  // If this is some other splat of 4-byte elements, see if we can handle it
-  // in another way.
-  // FIXME: Make this more undef happy and work with other widths (1,2 bytes).
-  if (VectorBits[0] == VectorBits[1] &&
-      unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
-    unsigned Bits = unsigned(VectorBits[0]);
+    
     
     // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is 
     // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  These are important
     // for fneg/fabs.
-    if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
+    if (SplatSize == 4 &&
+        SplatBits == 0x80000000 || SplatBits == (0x7FFFFFFF&~SplatUndef)) {
       // Make -1 and vspltisw -1:
       SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
       SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
@@ -1049,13 +1098,13 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
                       OnesV, OnesV);
       
       // If this is 0x7FFF_FFFF, xor by OnesV to invert it.
-      if (Bits == 0x7FFFFFFF)
+      if (SplatBits == 0x80000000)
         Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
       
       return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
     }
   }
-  
+    
   return SDOperand();
 }
 
diff --git a/llvm/lib/Target/PowerPC/README_ALTIVEC.txt b/llvm/lib/Target/PowerPC/README_ALTIVEC.txt
index 33e3e2a2b04..3c928ad6bbd 100644
--- a/llvm/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/llvm/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -40,8 +40,16 @@ a load/store/lve*x sequence.
 //===----------------------------------------------------------------------===//
 
 There are a wide range of vector constants we can generate with combinations of
-altivec instructions.  Examples
- GCC does: "t=vsplti*, r = t+t"  for constants it can't generate with one vsplti
+altivec instructions.
+
+Examples, these work with all widths:
+  Splat(+/- 16,18,20,22,24,28,30):  t = vspliti I/2,  r = t+t
+  Splat(+/- 17,19,21,23,25,29):     t = vsplti +/-15, t2 = vsplti I-15, r=t + t2
+  Splat(31):                        t = vsplti FB,  r = srl t,t
+  Splat(256):  t = vsplti 1, r = vsldoi t, t, 1
+
+Lots more are listed here:
+http://www.informatik.uni-bremen.de/~hobold/AltiVec.html
 
 This should be added to the ISD::BUILD_VECTOR case in 
 PPCTargetLowering::LowerOperation.
@@ -52,19 +60,6 @@ FABS/FNEG can be codegen'd with the appropriate and/xor of -0.0.
 
 //===----------------------------------------------------------------------===//
 
-Codegen the constant here with something better than a constant pool load.
-
-void %test_f(<4 x float>* %P, <4 x float>* %Q, float %X) {
-        %tmp = load <4 x float>* %Q
-        %tmp = cast <4 x float> %tmp to <4 x int>
-        %tmp1 = and <4 x int> %tmp, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 > 
-        %tmp2 = cast <4 x int> %tmp1 to <4 x float>
-        store <4 x float> %tmp2, <4 x float>* %P
-        ret void
-}
-
-//===----------------------------------------------------------------------===//
-
 For functions that use altivec AND have calls, we are VRSAVE'ing all call
 clobbered regs.
 
@@ -92,11 +87,6 @@ cond code on CR6.
 
 //===----------------------------------------------------------------------===//
 
-SROA should turn "vector unions" into the appropriate insert/extract element
-instructions.
- 
-//===----------------------------------------------------------------------===//
-
 We need a way to teach tblgen that some operands of an intrinsic are required to
 be constants.  The verifier should enforce this constraint.
author	Chris Lattner <sabre@nondot.org>	2006-04-16 01:01:29 +0000
committer	Chris Lattner <sabre@nondot.org>	2006-04-16 01:01:29 +0000
commit	fa5aa396c251c42ce127407c075e296bff0f88b1 (patch)
tree	d02f471ea511c333c0c318acfe9a6ee7baa47e2d
parent	9095186deb4ad286b764d1baa85c6acf653e4763 (diff)
download	bcm5719-llvm-fa5aa396c251c42ce127407c075e296bff0f88b1.tar.gz bcm5719-llvm-fa5aa396c251c42ce127407c075e296bff0f88b1.zip