diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-01 18:01:37 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-01 18:01:37 +0000 |
commit | 03dac8d8e48c2acd3517c028ebbbdeea0628a706 (patch) | |
tree | ae32e6ef19d67b295465178f615f02ffe67bae73 | |
parent | e55c1658ea4b2945942ab3d3646258101c75e2e2 (diff) | |
download | bcm5719-llvm-03dac8d8e48c2acd3517c028ebbbdeea0628a706.tar.gz bcm5719-llvm-03dac8d8e48c2acd3517c028ebbbdeea0628a706.zip |
DAGCombiner: Turn extract of bitcasted integer into truncate
This reduces the number of bitcast nodes and generally cleans up the
DAG when bitcasting between integers and vectors everywhere.
llvm-svn: 262358
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll | 17 |
3 files changed, 37 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3346ea7fb54..d2d48ea936b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12180,6 +12180,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && + ConstEltNo->isNullValue()) { + SDValue BCSrc = InVec.getOperand(0); + if (BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + } + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 58de94c9ef7..5fa7f73f57c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24850,13 +24850,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && N->getValueType(0) == MVT::i32 && InputVector.getValueType() == MVT::v2i32) { - - // The bitcast source is a direct mmx result. SDValue MMXSrc = InputVector.getNode()->getOperand(0); - if (MMXSrc.getValueType() == MVT::x86mmx) - return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector), - N->getValueType(0), - InputVector.getNode()->getOperand(0)); // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))). if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() && @@ -27940,11 +27934,22 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + + SDValue Src = N->getOperand(0); + // Try to detect AVG pattern first. - if (SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG, + if (SDValue Avg = detectAVGPattern(Src, N->getValueType(0), DAG, Subtarget, SDLoc(N))) return Avg; + // The bitcast source is a direct mmx result. + // Detect bitcasts between i32 to x86mmx + if (Src.getOpcode() == ISD::BITCAST && N->getValueType(0) == MVT::i32) { + SDValue BCSrc = Src.getOperand(0); + if (BCSrc.getValueType() == MVT::x86mmx) + return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(N), MVT::i32, BCSrc); + } + return combineVectorTruncation(N, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll new file mode 100644 index 00000000000..57a7c5254bb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Make sure the add and load are reduced to 32-bits even with the +; bitcast to vector. +; GCN-LABEL: {{^}}bitcast_int_to_vector_extract_0: +; GCN-DAG: s_load_dword [[B:s[0-9]+]] +; GCN-DAG: buffer_load_dword [[A:v[0-9]+]] +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, [[B]], [[A]] +; GCN: buffer_store_dword [[ADD]] +define void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) { + %a = load i64, i64 addrspace(1)* %in + %add = add i64 %a, %b + %val.bc = bitcast i64 %add to <2 x i32> + %extract = extractelement <2 x i32> %val.bc, i32 0 + store i32 %extract, i32 addrspace(1)* %out + ret void +} |