summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-15 05:52:59 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-15 05:52:59 +0000
commitb34635550a2d0fd3cc559023ce39374c5f99f0d5 (patch)
treedd69cbb4bf5be328d54804c04409f506c5dbd48c /llvm
parent3c64077c8277b87ee1fa242780676e2d99d255c4 (diff)
downloadbcm5719-llvm-b34635550a2d0fd3cc559023ce39374c5f99f0d5.tar.gz
bcm5719-llvm-b34635550a2d0fd3cc559023ce39374c5f99f0d5.zip
AMDGPU: Return correct type during argument lowering
The type needs to be casted back to the original argument type. Fixes an assert that for some reason is only run when using -debug. Includes an additional combine to avoid test regressions from having conversions mixed with multiple Assert[SZ]ext nodes. On subtargets where i16 is legal, this was producing an i32 register with an i16 AssertZExt, truncated to i16 with another i8 AssertZExt. t2: i32,ch = CopyFromReg t0, Register:i32 %vreg0 t3: i16 = truncate t2 t5: i16 = AssertZext t3, ValueType:ch:i8 t6: i8 = truncate t5 t7: i32 = zero_extend t6 llvm-svn: 308082
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp27
-rw-r--r--llvm/test/CodeGen/AMDGPU/function-args.ll16
4 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2553cf4da0f..631420589fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -573,6 +573,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FABS);
+ setTargetDAGCombine(ISD::AssertZext);
+ setTargetDAGCombine(ISD::AssertSext);
}
//===----------------------------------------------------------------------===//
@@ -2591,6 +2593,31 @@ SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
return SDValue(CSrc, 0);
}
+// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
+// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
+// issues.
+SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+
+ // (vt2 (assertzext (truncate vt0:x), vt1)) ->
+ // (vt2 (truncate (assertzext vt0:x, vt1)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue N1 = N->getOperand(1);
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
+ SDLoc SL(N);
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.bitsGE(ExtVT)) {
+ SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
+ return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
+ }
+ }
+
+ return SDValue();
+}
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -3521,6 +3548,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ return performAssertSZExtCombine(N, DCI);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a45234e2b39..d85aada6053 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -76,6 +76,7 @@ protected:
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index af4a2a9c679..36d4732be6d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1380,10 +1380,37 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned Reg = VA.getLocReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+ EVT ValVT = VA.getValVT();
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, DL, VT, Val,
+ DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
+ DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
+ }
+
if (IsShader && Arg.VT.isVector()) {
// Build a vector from the registers
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index 9b1368493ba..6b22cb0b7e2 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -34,6 +34,22 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
ret void
}
+; GCN-LABEL: {{^}}i1_arg_i1_use:
+; GCN: v_and_b32_e32 v0, 1, v0
+; GCN: v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1
+define void @i1_arg_i1_use(i1 %arg) #0 {
+bb:
+ br i1 %arg, label %bb2, label %bb1
+
+bb1:
+ store volatile i32 0, i32 addrspace(1)* undef
+ br label %bb2
+
+bb2:
+ ret void
+}
+
; GCN-LABEL: {{^}}void_func_i8:
; GCN-NOT: v0
; GCN: buffer_store_byte v0, off
OpenPOWER on IntegriCloud