summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2015-02-05 13:23:07 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2015-02-05 13:23:07 +0000
commitab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9 (patch)
tree302157d151aa34244ce5a1755caedbc46de7d8b2 /llvm/lib
parentcc6089d2e04339b8db4784582295b283c6b46398 (diff)
downloadbcm5719-llvm-ab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9.tar.gz
bcm5719-llvm-ab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9.zip
[X86][MMX] Handle i32->mmx conversion using movd
Implement a BITCAST dag combine to transform i32->mmx conversion patterns into a X86 specific node (MMX_MOVW2D) and guarantee that moves between i32 and x86mmx are better handled, i.e., don't use store-load to do the conversion.. llvm-svn: 228293
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td10
4 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17d0fce54f8..eff03f030c5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1675,6 +1675,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
@@ -22986,6 +22987,25 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+/// special and don't usually play with other vector types, it's better to
+/// handle them early to be sure we emit efficient code by avoiding
+/// store-load conversions.
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::x86mmx ||
+ N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
+ N->getOperand(0)->getValueType(0) != MVT::v2i32)
+ return SDValue();
+
+ SDValue V = N->getOperand(0);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
+ N->getValueType(0), V.getOperand(0));
+
+ return SDValue();
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// into a somewhat faster sequence. For i686, the best sequence is apparently
@@ -26129,6 +26149,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 346f98e6bca..d90dcc71451 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -158,6 +158,10 @@ namespace llvm {
/// vector to a GPR.
MMX_MOVD2W,
+ /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector
+ /// and zero out the high word.
+ MMX_MOVW2D,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f65292e7602..fd0a4ba28e8 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -18,6 +18,9 @@
// Low word of MMX to GPR.
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
[SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+// GPR to low word of MMX.
+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
//===----------------------------------------------------------------------===//
// MMX Pattern Fragments
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index b5262c5cdac..fbec0625418 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -229,6 +229,16 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))],
IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+
+let Predicates = [HasMMX] in {
+ let AddedComplexity = 15 in
+ def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
+ (MMX_MOVD64rr GR32:$src)>;
+ let AddedComplexity = 20 in
+ def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
+ (MMX_MOVD64rm addr:$src)>;
+}
+
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
OpenPOWER on IntegriCloud