4 files changed, 123 insertions, 2 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8b3ae47b4b2..7645a875109 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -102,6 +102,11 @@ namespace {
                                unsigned AsmVariant, const char *ExtraCode);
     
     
+    void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+      char value = MI->getOperand(OpNo).getImmedValue();
+      value = (value << (32-5)) >> (32-5);
+      O << (int)value;
+    }
     void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
       unsigned char value = MI->getOperand(OpNo).getImmedValue();
       assert(value <= 31 && "Invalid u5imm argument!");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 09494a596cc..ae48574055f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -301,6 +301,63 @@ bool PPC::isZeroVector(SDNode *N) {
   return true;
 }
 
+/// isVecSplatImm - Return true if this is a build_vector of constants which
+/// can be formed by using a vspltis[bhw] instruction.  The ByteSize field
+/// indicates the number of bytes of each element [124] -> [bhw].
+bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) {
+  SDOperand OpVal(0, 0);
+  // Check to see if this buildvec has a single non-undef value in its elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.Val == 0)
+      OpVal = N->getOperand(i);
+    else if (OpVal != N->getOperand(i))
+      return false;
+  }
+  
+  if (OpVal.Val == 0) return false;  // All UNDEF: use implicit def.
+  
+  unsigned ValSizeInBytes;
+  uint64_t Value;
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+    Value = CN->getValue();
+    ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
+  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+    Value = FloatToBits(CN->getValue());
+    ValSizeInBytes = 4;
+  }
+
+  // If the splat value is larger than the element value, then we can never do
+  // this splat.  The only case that we could fit the replicated bits into our
+  // immediate field for would be zero, and we prefer to use vxor for it.
+  if (ValSizeInBytes < ByteSize) return false;
+  
+  // If the element value is larger than the splat value, cut it in half and
+  // check to see if the two halves are equal.  Continue doing this until we
+  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
+  while (ValSizeInBytes > ByteSize) {
+    ValSizeInBytes >>= 1;
+    
+    // If the top half equals the bottom half, we're still ok.
+    if (((Value >> (ValSizeInBytes*8)) & ((8 << ValSizeInBytes)-1)) !=
+         (Value                        & ((8 << ValSizeInBytes)-1)))
+      return false;
+  }
+
+  // Properly sign extend the value.
+  int ShAmt = (4-ByteSize)*8;
+  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+  
+  // If this is zero, don't match, zero matches isZeroVector.
+  if (MaskVal == 0) return false;
+
+  if (Val) *Val = MaskVal;
+
+  // Finally, if this value fits in a 5 bit sext field, return true.
+  return ((MaskVal << (32-5)) >> (32-5)) == MaskVal; 
+}
+
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
@@ -668,6 +725,12 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
     // FIXME: We should handle splat(-0.0), and other cases here.
     if (PPC::isZeroVector(Op.Val))
       return Op;
+    
+    if (PPC::isVecSplatImm(Op.Val, 1) ||    // vspltisb
+        PPC::isVecSplatImm(Op.Val, 2) ||    // vspltish
+        PPC::isVecSplatImm(Op.Val, 4))      // vspltisw
+      return Op;
+      
     return SDOperand();
     
   case ISD::VECTOR_SHUFFLE: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 5482e9aa5c0..eeab53df009 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -105,6 +105,11 @@ namespace llvm {
     /// isZeroVector - Return true if this build_vector is an all-zero vector.
     ///
     bool isZeroVector(SDNode *N);
+    
+    /// isVecSplatImm - Return true if this is a build_vector of constants which
+    /// can be formed by using a vspltis[bhw] instruction.  The ByteSize field
+    /// indicates the number of bytes of each element [124] -> [bhw].
+    bool isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val = 0);
   }
   
   class PPCTargetLowering : public TargetLowering {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 5078d392505..4ac06a6f76e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -140,6 +140,37 @@ def vecimm0 : PatLeaf<(build_vector), [{
 }]>;
 
 
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+  char Val;
+  PPC::isVecSplatImm(N, 1, &Val);
+  return getI32Imm(Val);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+  return PPC::isVecSplatImm(N, 1);
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+  char Val;
+  PPC::isVecSplatImm(N, 2, &Val);
+  return getI32Imm(Val);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+  return PPC::isVecSplatImm(N, 2);
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+  char Val;
+  PPC::isVecSplatImm(N, 4, &Val);
+  return getI32Imm(Val);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+  return PPC::isVecSplatImm(N, 4);
+}], VSPLTISW_get_imm>;
+
+
 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
 
@@ -155,6 +186,9 @@ class isDOT   {
 //===----------------------------------------------------------------------===//
 // PowerPC Operand Definitions.
 
+def s5imm   : Operand<i32> {
+  let PrintMethod = "printS5ImmOperand";
+}
 def u5imm   : Operand<i32> {
   let PrintMethod = "printU5ImmOperand";
 }
@@ -1055,12 +1089,21 @@ def VSPLTB : VXForm_1<524, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
 def VSPLTH : VXForm_1<588, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
                       []>;
-                      
 def VSPLTW : VXForm_1<652, (ops VRRC:$vD, u5imm:$UIMM, VRRC:$vB),
                       "vspltw $vD, $vB, $UIMM", VecPerm,
                       [(set VRRC:$vD, (vector_shuffle (v4f32 VRRC:$vB), (undef),
                                       VSPLT_shuffle_mask:$UIMM))]>;
-                      // FIXME: ALSO ADD SUPPORT FOR v4i32!
+
+def VSPLTISB : VXForm_1<780, (ops VRRC:$vD, s5imm:$SIMM),
+                      "vspltisb $vD, $SIMM", VecPerm,
+                      [(set VRRC:$vD, (v4f32 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_1<844, (ops VRRC:$vD, s5imm:$SIMM),
+                      "vspltish $vD, $SIMM", VecPerm,
+                      [(set VRRC:$vD, (v4f32 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_1<908, (ops VRRC:$vD, s5imm:$SIMM),
+                      "vspltisw $vD, $SIMM", VecPerm,
+                      [(set VRRC:$vD, (v4f32 vecspltisw:$SIMM))]>;
+
                       
 // VX-Form Pseudo Instructions
 
@@ -1216,6 +1259,11 @@ def : Pat<(v4i32 (PPClve_x xoaddr:$src)),
 def : Pat<(v4i32 (undef)), (v4i32 (IMPLICIT_DEF_VRRC))>;
 def : Pat<(v4i32 vecimm0), (v4i32 (V_SET0))>;
 
+def : Pat<(v4i32 vecspltisb:$invec), (v4i32 (VSPLTISB vecspltisb:$invec))>;
+def : Pat<(v4i32 vecspltish:$invec), (v4i32 (VSPLTISH vecspltish:$invec))>;
+def : Pat<(v4i32 vecspltisw:$invec), (v4i32 (VSPLTISW vecspltisw:$invec))>;
+
+
 // bit_convert
 def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
 def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;