diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-11 12:54:37 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-11 12:54:37 +0000 |
| commit | 255fdd0666665ee02dd4a621b9bda04f0d00c209 (patch) | |
| tree | b04dc7e896023c5703619ae8c8c9db6947b49bd5 | |
| parent | 7cc9263ec2fe37a2c673eb02254bb3124036273e (diff) | |
| download | bcm5719-llvm-255fdd0666665ee02dd4a621b9bda04f0d00c209.tar.gz bcm5719-llvm-255fdd0666665ee02dd4a621b9bda04f0d00c209.zip | |
[X86][SSE] Use vXi8 return type for PSLLDQ/PSRLDQ instructions
These are byte shift instructions and it will make shuffle combining a lot more straightforward if we can assume a vXi8 vector of bytes so decoded shuffle masks match the return type's number of elements
llvm-svn: 272468
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 12 |
3 files changed, 22 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b360cb8ccba..1356d278d6c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5444,7 +5444,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, SDLoc dl) { assert(VT.is128BitVector() && "Unknown type for VShift"); - MVT ShVT = MVT::v2i64; + MVT ShVT = MVT::v16i8; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT); @@ -7731,16 +7731,16 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, int LoByteShift = 16 - Rotation * Scale; int HiByteShift = Rotation * Scale; - // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ. - Lo = DAG.getBitcast(MVT::v2i64, Lo); - Hi = DAG.getBitcast(MVT::v2i64, Hi); + // Cast the inputs to v16i8 to match PSLLDQ/PSRLDQ. + Lo = DAG.getBitcast(MVT::v16i8, Lo); + Hi = DAG.getBitcast(MVT::v16i8, Hi); - SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo, + SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8)); - SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi, + SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, DAG.getConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, - DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); + DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros). @@ -7806,7 +7806,8 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, // We need to round trip through the appropriate type for the shift. MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale); - MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale); + MVT ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8) + : MVT::getVectorVT(ShiftSVT, Size / Scale); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); @@ -8382,9 +8383,9 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2Shuffle[V2Index] = 0; V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { - V2 = DAG.getBitcast(MVT::v2i64, V2); + V2 = DAG.getBitcast(MVT::v16i8, V2); V2 = DAG.getNode( - X86ISD::VSHLDQ, DL, MVT::v2i64, V2, + X86ISD::VSHLDQ, DL, MVT::v16i8, V2, DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, DAG.getTargetLoweringInfo().getScalarShiftAmountTy( DAG.getDataLayout(), VT))); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 83bfa36cfb4..610cbf0c37a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7561,19 +7561,20 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode - (_.LdFrag addr:$src1), (i8 imm:$src2))))]>; + (_.VT (bitconvert (_.LdFrag addr:$src1))), + (i8 imm:$src2))))]>; } multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, Predicate prd>{ let Predicates = [prd] in defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, v8i64_info>, EVEX_V512; + OpcodeStr, v64i8_info>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, v4i64x_info>, EVEX_V256; + OpcodeStr, v32i8x_info>, EVEX_V256; defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, v2i64x_info>, EVEX_V128; + OpcodeStr, v16i8x_info>, EVEX_V128; } } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 3f1f4e71349..d588fba1dad 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4081,13 +4081,13 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] , (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>, + (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>, VEX_4V; def VPSRLDQri : PDIi8<0x73, MRM3r, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>, + (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>, VEX_4V; // PSRADQri doesn't exist in SSE[1-3]. } // Predicates = [HasAVX, NoVLX_Or_NoBWI] @@ -4131,13 +4131,13 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 , (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2), "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR256:$dst, - (v4i64 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>, + (v32i8 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>, VEX_4V, VEX_L; def VPSRLDQYri : PDIi8<0x73, MRM3r, (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2), "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR256:$dst, - (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>, + (v32i8 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>, VEX_4V, VEX_L; // PSRADQYri doesn't exist in SSE[1-3]. } // Predicates = [HasAVX2, NoVLX_Or_NoBWI] @@ -4176,13 +4176,13 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "pslldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))], + (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))], IIC_SSE_INTSHDQ_P_RI>; def PSRLDQri : PDIi8<0x73, MRM3r, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))], + (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))], IIC_SSE_INTSHDQ_P_RI>; // PSRADQri doesn't exist in SSE[1-3]. } |

