diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-12 18:24:38 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-12 18:24:38 +0000 |
commit | 3b08238f7878988a8e2737cdab42fcc334c8547f (patch) | |
tree | 207ef8f31d957df8494a1546b119aadb761263e6 /llvm/lib/Target | |
parent | 15d1b4e2aab9159a8cc56b49eb68f1a82e4ebe90 (diff) | |
download | bcm5719-llvm-3b08238f7878988a8e2737cdab42fcc334c8547f.tar.gz bcm5719-llvm-3b08238f7878988a8e2737cdab42fcc334c8547f.zip |
AMDGPU: Eliminate half of i64 or if one operand is zero_extend from i32
This helps clean up some of the mess when expanding unaligned 64-bit
loads when changed to be promote to v2i32, and fixes situations
where or x, 0 was emitted after splitting 64-bit ors during moveToVALU.
I think this could be a generic combine but I'm not sure.
llvm-svn: 266104
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cb490921395..64975cf1809 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2332,6 +2332,36 @@ SDValue SITargetLowering::performOrCombine(SDNode *N, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + EVT VT = N->getValueType(0); + if (VT == MVT::i64) { + // TODO: This could be a generic combine with a predicate for extracting the + // high half of an integer being free. + + // (or i64:x, (zero_extend i32:y)) -> + // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x))) + if (LHS.getOpcode() == ISD::ZERO_EXTEND && + RHS.getOpcode() != ISD::ZERO_EXTEND) + std::swap(LHS, RHS); + + if (RHS.getOpcode() == ISD::ZERO_EXTEND) { + SDValue ExtSrc = RHS.getOperand(0); + EVT SrcVT = ExtSrc.getValueType(); + if (SrcVT == MVT::i32) { + SDLoc SL(N); + SDValue LowLHS, HiBits; + std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG); + SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc); + + DCI.AddToWorklist(LowOr.getNode()); + DCI.AddToWorklist(HiBits.getNode()); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + LowOr, HiBits); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); + } + } + } + // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2) if (LHS.getOpcode() == AMDGPUISD::FP_CLASS && RHS.getOpcode() == AMDGPUISD::FP_CLASS) { |