summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-28 23:23:55 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-28 23:23:55 +0000
commitabe742e8fb7238b902aa8a97f382f5d601c5064d (patch)
tree41411894b3d048d3d2fe2cb30cb3c86fd99d1ab7 /llvm/lib/Target/X86/X86ISelLowering.cpp
parentf8a678d2fdc280b46c7be0422f84abafc345b842 (diff)
downloadbcm5719-llvm-abe742e8fb7238b902aa8a97f382f5d601c5064d.tar.gz
bcm5719-llvm-abe742e8fb7238b902aa8a97f382f5d601c5064d.zip
[x86] Fix the new vector shuffle lowering's use of VSELECT for AVX2
lowerings. This was hopelessly broken. First, the x86 backend wants '-1' to be the element value representing true in a boolean vector, and second the operand order for VSELECT is backwards from the actual x86 instructions. To make matters worse, the backend is just using '-1' as the true value to get the high bit to be set. It doesn't actually symbolically map the '-1' to anything. But on x86 this isn't quite how it works: there *only* the high bit is relevant. As a consequence weird non-'-1' values like 0x80 actually "work" once you flip the operands to be backwards. Anyways, thanks to Hal for helping me sort out what these *should* be. llvm-svn: 218582
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
1 files changed, 16 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c74a84665fb..0cda9103337 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7379,22 +7379,34 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
// FALLTHROUGH
case MVT::v32i8: {
assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
- SDValue PBLENDVMask[32];
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
assert(Mask.size() * Scale == 32 && "Not a 256-bit vector!");
+
+ // Compute the VSELECT mask. Note that VSELECT is really confusing in the
+ // mix of LLVM's code generator and the x86 backend. We tell the code
+ // generator that boolean values in the elements of an x86 vector register
+ // are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
+ // mapping a select to operand #1, and 'false' mapping to operand #2. The
+ // reality in x86 is that vector masks (pre-AVX-512) use only the high bit
+ // of the element (the remaining are ignored) and 0 in that high bit would
+ // mean operand #1 while 1 in the high bit would mean operand #2. So while
+ // the LLVM model for boolean values in vector elements gets the relevant
+ // bit set, it is set backwards and over constrained relative to x86's
+ // actual model.
+ SDValue VSELECTMask[32];
for (int i = 0, Size = Mask.size(); i < Size; ++i)
for (int j = 0; j < Scale; ++j)
- PBLENDVMask[Scale * i + j] =
+ VSELECTMask[Scale * i + j] =
Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
- : DAG.getConstant(Mask[i] < Size ? 0 : 0x80, MVT::i8);
+ : DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8);
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V2);
return DAG.getNode(
ISD::BITCAST, DL, VT,
DAG.getNode(ISD::VSELECT, DL, MVT::v32i8,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PBLENDVMask),
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, VSELECTMask),
V1, V2));
}
OpenPOWER on IntegriCloud