diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-07-15 05:52:59 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-07-15 05:52:59 +0000 |
| commit | b34635550a2d0fd3cc559023ce39374c5f99f0d5 (patch) | |
| tree | dd69cbb4bf5be328d54804c04409f506c5dbd48c /llvm | |
| parent | 3c64077c8277b87ee1fa242780676e2d99d255c4 (diff) | |
| download | bcm5719-llvm-b34635550a2d0fd3cc559023ce39374c5f99f0d5.tar.gz bcm5719-llvm-b34635550a2d0fd3cc559023ce39374c5f99f0d5.zip | |
AMDGPU: Return correct type during argument lowering
The type needs to be casted back to the original argument type.
Fixes an assert that for some reason is only run when
using -debug.
Includes an additional combine to avoid test regressions
from having conversions mixed with multiple Assert[SZ]ext
nodes. On subtargets where i16 is legal, this was producing an i32
register with an i16 AssertZExt, truncated to i16 with another i8
AssertZExt.
t2: i32,ch = CopyFromReg t0, Register:i32 %vreg0
t3: i16 = truncate t2
t5: i16 = AssertZext t3, ValueType:ch:i8
t6: i8 = truncate t5
t7: i32 = zero_extend t6
llvm-svn: 308082
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/function-args.ll | 16 |
4 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2553cf4da0f..631420589fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -573,6 +573,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FABS); + setTargetDAGCombine(ISD::AssertZext); + setTargetDAGCombine(ISD::AssertSext); } //===----------------------------------------------------------------------===// @@ -2591,6 +2593,31 @@ SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, return SDValue(CSrc, 0); } +// FIXME: This should go in generic DAG combiner with an isTruncateFree check, +// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU +// issues. +SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + + // (vt2 (assertzext (truncate vt0:x), vt1)) -> + // (vt2 (truncate (assertzext vt0:x, vt1))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue N1 = N->getOperand(1); + EVT ExtVT = cast<VTSDNode>(N1)->getVT(); + SDLoc SL(N); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.bitsGE(ExtVT)) { + SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); + return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); + } + } + + return SDValue(); +} /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -3521,6 +3548,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, break; } + case ISD::AssertZext: + case ISD::AssertSext: + return performAssertSZExtCombine(N, DCI); } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index a45234e2b39..d85aada6053 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -76,6 +76,7 @@ protected: SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index af4a2a9c679..36d4732be6d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1380,10 +1380,37 @@ SDValue SITargetLowering::LowerFormalArguments( unsigned Reg = VA.getLocReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + EVT ValVT = VA.getValVT(); Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + if (IsShader && Arg.VT.isVector()) { // Build a vector from the registers Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index 9b1368493ba..6b22cb0b7e2 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -34,6 +34,22 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ret void } +; GCN-LABEL: {{^}}i1_arg_i1_use: +; GCN: v_and_b32_e32 v0, 1, v0 +; GCN: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1 +define void @i1_arg_i1_use(i1 %arg) #0 { +bb: + br i1 %arg, label %bb2, label %bb1 + +bb1: + store volatile i32 0, i32 addrspace(1)* undef + br label %bb2 + +bb2: + ret void +} + ; GCN-LABEL: {{^}}void_func_i8: ; GCN-NOT: v0 ; GCN: buffer_store_byte v0, off |

