diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-11 17:40:32 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-06-11 17:40:32 +0000 |
| commit | 064c206d235b1db75809759f4902091268c174be (patch) | |
| tree | 7aa961ccd731099a8fe6bd7239ff22b8652c6f69 /llvm/lib/Target | |
| parent | 4dc9eaa6ba9c7bfa181a7355131e7be93afebb2b (diff) | |
| download | bcm5719-llvm-064c206d235b1db75809759f4902091268c174be.tar.gz bcm5719-llvm-064c206d235b1db75809759f4902091268c174be.zip | |
R600/SI: Fix selection failure on scalar_to_vector
There seem to be only 2 places that produce these,
and it's kind of tricky to hit them.
Also fixes failure to bitcast between i64 and v2f32,
although this for some reason wasn't actually broken in the
simple bitcast testcase, but did in the scalar_to_vector one.
llvm-svn: 210664
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 3 |
2 files changed, 23 insertions, 6 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index f1f0bfa89ac..381fa8eb94f 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -256,6 +256,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { }; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } + case ISD::SCALAR_TO_VECTOR: case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPURegisterInfo *TRI = @@ -264,7 +265,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); - assert(VT.getVectorElementType().bitsEq(MVT::i32)); + EVT EltVT = VT.getVectorElementType(); + assert(EltVT.bitsEq(MVT::i32)); if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); @@ -313,8 +315,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); if (NumVectorElts == 1) { - return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, - VT.getVectorElementType(), + return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), RegClass); } @@ -323,11 +324,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class - SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1); + SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; - for (unsigned i = 0; i < N->getNumOperands(); i++) { + unsigned NOps = N->getNumOperands(); + for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; @@ -337,6 +339,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } + + if (NOps != NumVectorElts) { + // Fill in the missing undef elements if this was a scalar_to_vector. + assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); + + MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + SDLoc(N), EltVT); + for (unsigned i = NOps; i < NumVectorElts; ++i) { + RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); + } + } + if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 475f28e699b..7d37d105672 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1874,7 +1874,8 @@ def : BitConvert <v2f32, v2i32, VReg_64>; def : BitConvert <v2i32, v2f32, VReg_64>; def : BitConvert <v2i32, i64, VReg_64>; def : BitConvert <i64, v2i32, VReg_64>; - +def : BitConvert <v2f32, i64, VReg_64>; +def : BitConvert <i64, v2f32, VReg_64>; def : BitConvert <v4f32, v4i32, VReg_128>; def : BitConvert <v4i32, v4f32, VReg_128>; |

