diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2015-02-05 13:23:07 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2015-02-05 13:23:07 +0000 |
commit | ab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9 (patch) | |
tree | 302157d151aa34244ce5a1755caedbc46de7d8b2 /llvm/lib | |
parent | cc6089d2e04339b8db4784582295b283c6b46398 (diff) | |
download | bcm5719-llvm-ab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9.tar.gz bcm5719-llvm-ab9ae87623ac6bae1fe28a8c740063cfcd5fcdb9.zip |
[X86][MMX] Handle i32->mmx conversion using movd
Implement a BITCAST dag combine to transform i32->mmx conversion patterns
into a X86 specific node (MMX_MOVW2D) and guarantee that moves between
i32 and x86mmx are better handled, i.e., don't use store-load to do the
conversion..
llvm-svn: 228293
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 10 |
4 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17d0fce54f8..eff03f030c5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1675,6 +1675,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); @@ -22986,6 +22987,25 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are +/// special and don't usually play with other vector types, it's better to +/// handle them early to be sure we emit efficient code by avoiding +/// store-load conversions. +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getValueType(0) != MVT::x86mmx || + N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR || + N->getOperand(0)->getValueType(0) != MVT::v2i32) + return SDValue(); + + SDValue V = N->getOperand(0); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1)); + if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32) + return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)), + N->getValueType(0), V.getOperand(0)); + + return SDValue(); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// into a somewhat faster sequence. For i686, the best sequence is apparently @@ -26129,6 +26149,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); + case ISD::BITCAST: return PerformBITCASTCombine(N, DAG); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 346f98e6bca..d90dcc71451 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -158,6 +158,10 @@ namespace llvm { /// vector to a GPR. MMX_MOVD2W, + /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector + /// and zero out the high word. + MMX_MOVW2D, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index f65292e7602..fd0a4ba28e8 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -18,6 +18,9 @@ // Low word of MMX to GPR. def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; +// GPR to low word of MMX. +def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1, + [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>; //===----------------------------------------------------------------------===// // MMX Pattern Fragments diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index b5262c5cdac..fbec0625418 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -229,6 +229,16 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), [(set VR64:$dst, (x86mmx (scalar_to_vector (loadi32 addr:$src))))], IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; + +let Predicates = [HasMMX] in { + let AddedComplexity = 15 in + def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)), + (MMX_MOVD64rr GR32:$src)>; + let AddedComplexity = 20 in + def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))), + (MMX_MOVD64rm addr:$src)>; +} + let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>, |