summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2012-04-09 07:45:58 +0000
committerNadav Rotem <nadav.rotem@intel.com>2012-04-09 07:45:58 +0000
commitb801ca397682a3d24eefb5951a002864ef40c555 (patch)
treeb9ed2b66883e4a888723dc405726fe767a28e10e
parent9c3da316ec6483f5358eeae8683f14b467a3acd1 (diff)
downloadbcm5719-llvm-b801ca397682a3d24eefb5951a002864ef40c555.tar.gz
bcm5719-llvm-b801ca397682a3d24eefb5951a002864ef40c555.zip
Fix a bug in the lowering of broadcasts: ConstantPools need to use the target pointer type.
Move NormalizeVectorShuffle and LowerVectorBroadcast into X86TargetLowering. llvm-svn: 154310
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp30
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/test/CodeGen/X86/avx2-vbroadcast.ll14
3 files changed, 27 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f1a5e925dea..615cb67f2b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4859,12 +4859,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// a scalar load, or a constant.
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
- DebugLoc &dl, SelectionDAG &DAG) {
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
if (!Subtarget->hasAVX())
return SDValue();
EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
SDValue Ld;
bool ConstSplatVal;
@@ -4905,7 +4906,7 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
- Ld.getOpcode() == ISD::ConstantFP);
+ Ld.getOpcode() == ISD::ConstantFP);
// The scalar_to_vector node and the suspected
// load node must have exactly one user.
@@ -4930,11 +4931,6 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
(Is128 && (ScalarSize == 32))) {
- // This is the type of the load operation for the constant that we save
- // in the constant pool. We can't load float values from the constant pool
- // because the DAG has to be legal at this stage.
- MVT LdTy = (ScalarSize == 32 ? MVT::i32 : MVT::i64);
-
const Constant *C = 0;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -4943,14 +4939,12 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
assert(C && "Invalid constant type");
- SDValue CP = DAG.getConstantPool(C, LdTy);
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(LdTy, dl, DAG.getEntryNode(), CP,
+ Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
- // Bitcast the loaded constant back to the requested type.
- Ld = DAG.getNode(ISD::BITCAST, dl, CVT, Ld);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
}
@@ -5017,7 +5011,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
}
- SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
@@ -6226,10 +6220,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
getShuffleSHUFImmediate(SVOp), DAG);
}
-static
-SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI,
- const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -6245,7 +6237,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
int Size = VT.getSizeInBits();
// Use vbroadcast whenever the splat comes from a foldable load
- SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
@@ -6332,7 +6324,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
- SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
if (NewOp.getNode())
return NewOp;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d11b4334a95..6e5eda6b7f0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -780,6 +780,8 @@ namespace llvm {
// Utility functions to help LowerVECTOR_SHUFFLE
SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 314466175e1..1a78414761c 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -171,3 +171,17 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
ret <4 x float> %vecinit6.i
}
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+ %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
+ %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+ %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+ %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+ %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+ %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+ %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+ %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+ ret <8 x i8> %vecinit7.i
+}
OpenPOWER on IntegriCloud