summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp46
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll6
3 files changed, 29 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ec27bde222..966af04275c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4678,7 +4678,22 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
MVT VT = MaskNode.getSimpleValueType();
assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
+ // Split an APInt element into MaskEltSizeInBits sized pieces and
+ // insert into the shuffle mask.
+ auto SplitElementToMask = [&](APInt Element) {
+ // Note that this is x86 and so always little endian: the low byte is
+ // the first byte of the mask.
+ int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+ for (int i = 0; i < Split; ++i) {
+ APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
+ Element = Element.lshr(MaskEltSizeInBits);
+ RawMask.push_back(RawElt.getZExtValue());
+ }
+ };
+
if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
+ // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
+ // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
return false;
if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
@@ -4693,13 +4708,16 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
- if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
+
+ // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
+ if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
return false;
- SDValue MaskElement = MaskNode.getOperand(0).getOperand(0);
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskElement)) {
- APInt RawElt = CN->getAPIntValue().getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- RawMask.append(VT.getVectorNumElements() - 1, 0);
+ unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+
+ SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
+ SplitElementToMask(CN->getAPIntValue());
+ RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
return true;
}
return false;
@@ -4711,7 +4729,6 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
// TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
return false;
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
for (int i = 0, e = MaskNode.getNumOperands(); i < e; ++i) {
SDValue Op = MaskNode.getOperand(i);
@@ -4720,23 +4737,12 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
continue;
}
- APInt MaskElement;
if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
- MaskElement = CN->getAPIntValue();
+ SplitElementToMask(CN->getAPIntValue());
else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
- MaskElement = CFN->getValueAPF().bitcastToAPInt();
+ SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
else
return false;
-
- // We now have to decode the element which could be any integer size and
- // extract each byte of it.
- for (unsigned j = 0; j < ElementSplit; ++j) {
- // Note that this is x86 and so always little endian: the low byte is
- // the first byte of the mask.
- APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- MaskElement = MaskElement.lshr(MaskEltSizeInBits);
- }
}
return true;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index cee102cb6c8..0f1cae42d10 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -12,18 +12,12 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
define <16 x i8> @combine_vpshufb_zero(<16 x i8> %a0) {
; SSE-LABEL: combine_vpshufb_zero:
; SSE: # BB#0:
-; SSE-NEXT: movl $128, %eax
-; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: pshufb %xmm1, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vpshufb_zero:
; AVX: # BB#0:
-; AVX-NEXT: movl $128, %eax
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index a6376dc48cf..0fba8319b59 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -23,11 +23,7 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_zero:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $128, %eax
-; CHECK-NEXT: vmovd %eax, %xmm2
-; CHECK-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
-; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
OpenPOWER on IntegriCloud