summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-28 10:14:51 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-28 10:14:51 +0000
commit818a983e93c1847f21744cb0b07f2b32d5acddc9 (patch)
treea124bb2e73323acc3fba0414cbee43a481e7d7f6 /llvm/lib
parenta3932023774c074ddfe8bac108179740a5758fa6 (diff)
downloadbcm5719-llvm-818a983e93c1847f21744cb0b07f2b32d5acddc9.tar.gz
bcm5719-llvm-818a983e93c1847f21744cb0b07f2b32d5acddc9.zip
Add X86 instruction selection for VPERM2I128 when AVX2 is enabled. Merge VPERMILPS/VPERMILPD detection since they are pretty similar.
llvm-svn: 145238
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp178
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h1
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td47
4 files changed, 117 insertions, 110 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6099ba93260..cde20c3cb37 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2850,6 +2850,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMILPS:
case X86ISD::VPERMILPD:
case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2I128:
return true;
}
return false;
@@ -2891,6 +2892,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2I128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -3283,8 +3285,8 @@ static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
return false;
// For VSHUFPSY, the mask of the second half must be the same as the first
- // but with // the appropriate offsets. This works in the same way as
- // VPERMILPS // works with masks.
+ // but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
return false;
@@ -3358,8 +3360,8 @@ static bool isCommutedVSHUFPY(ShuffleVectorSDNode *N, bool HasAVX) {
return false;
// For VSHUFPSY, the mask of the second half must be the same as the first
- // but with // the appropriate offsets. This works in the same way as
- // VPERMILPS // works with masks.
+ // but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
return false;
@@ -3753,15 +3755,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
/// as permutations between 128-bit chunks or halves. As an example: this
/// shuffle bellow:
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3789,9 +3791,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
return MatchA && MatchB;
}
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
EVT VT = SVOp->getValueType(0);
@@ -3814,80 +3816,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
return (FstHalf | (SndHalf << 4));
}
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
+static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits()/128;
- if (!Subtarget->hasAVX())
+ if (!HasAVX)
return false;
- // Only match 256-bit with 64-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 4)
+ // Only match 256-bit with 32/64-bit types
+ if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
return false;
- // The mask on the high lane is independent of the low. Both can match
- // any element in inside its own lane, but can't cross.
int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int LaneStart = l*LaneSize;
- if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ for (int l = 0; l != NumLanes; ++l) {
+ int LaneStart = l*LaneSize;
+ for (int i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize))
+ return false;
+ if (NumElts == 4 || l == 0)
+ continue;
+ // VPERMILPS handling
+ if (Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize))
return false;
}
-
- return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
-
- if (!Subtarget->hasAVX())
- return false;
-
- // Only match 256-bit with 32-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 8)
- return false;
-
- // The mask on the high lane should be the same as the low. Actually,
- // they can differ if any of the corresponding index in a lane is undef
- // and the other stays in range.
- int LaneSize = NumElts/NumLanes;
- for (int i = 0; i < LaneSize; ++i) {
- int HighElt = i+LaneSize;
- bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
- bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
- if (!HighValid || !LowValid)
- return false;
- if (Mask[i] < 0 || Mask[HighElt] < 0)
- continue;
- if (Mask[HighElt]-Mask[i] != LaneSize)
- return false;
}
return true;
}
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
+/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions.
+static unsigned getShuffleVPERMILPImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
EVT VT = SVOp->getValueType(0);
@@ -3899,43 +3868,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
// where a mask will match because the same mask element is undef on the
// first half but valid on the second. This would get pathological cases
// such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Shift = (LaneSize == 4) ? 2 : 1;
unsigned Mask = 0;
- for (int l = 0; l < NumLanes; ++l) {
- for (int i = 0; i < LaneSize; ++i) {
- int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
- if (MaskElt < 0)
- continue;
- if (MaskElt >= LaneSize)
- MaskElt -= LaneSize;
- Mask |= MaskElt << (i*2);
- }
+ for (int i = 0; i != NumElts; ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ MaskElt %= LaneSize;
+ unsigned Shamt = i;
+ // VPERMILPSY, the mask of the first half must be equal to the second one
+ if (NumElts == 8) Shamt %= LaneSize;
+ Mask |= MaskElt << (Shamt*Shift);
}
return Mask;
}
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
-
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
-
- unsigned Mask = 0;
- int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int MaskElt = SVOp->getMaskElt(i);
- if (MaskElt < 0)
- continue;
- Mask |= (MaskElt-l*LaneSize) << i;
- }
-
- return Mask;
-}
-
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -4677,6 +4625,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
ShuffleMask);
break;
case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2I128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
@@ -6596,6 +6545,22 @@ static inline unsigned getVPERMILOpcode(EVT VT) {
return 0;
}
+static inline unsigned getVPERM2X128Opcode(EVT VT, bool HasAVX2) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ case MVT::v4i64:
+ if (HasAVX2) return X86ISD::VPERM2I128;
+ // else use fp unit for int vperm
+ case MVT::v8f32:
+ case MVT::v4f64: return X86ISD::VPERM2F128;
+ default:
+ llvm_unreachable("Unknown type for vpermil");
+ }
+ return 0;
+}
+
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -6910,22 +6875,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isMOVDDUPYMask(SVOp, Subtarget))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
- // Handle VPERMILPS* permutations
- if (isVPERMILPSMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPSImmediate(SVOp), DAG);
-
- // Handle VPERMILPD* permutations
- if (isVPERMILPDMask(M, VT, Subtarget))
+ // Handle VPERMILPS/D* permutations
+ if (isVPERMILPMask(M, VT, Subtarget->hasAVX()))
return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPDImmediate(SVOp), DAG);
+ getShuffleVPERMILPImmediate(SVOp), DAG);
- // Handle VPERM2F128 permutations
- if (isVPERM2F128Mask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
- getShuffleVPERM2F128Immediate(SVOp), DAG);
+ // Handle VPERM2F128/VPERM2I128 permutations
+ if (isVPERM2X128Mask(M, VT, Subtarget->hasAVX()))
+ return getTargetShuffleNode(getVPERM2X128Opcode(VT, HasAVX2), dl, VT, V1,
+ V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- // Handle VSHUFPSY permutations
+ // Handle VSHUFPS/DY permutations
if (isVSHUFPYMask(M, VT, Subtarget->hasAVX()))
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
getShuffleVSHUFPYImmediate(SVOp), DAG);
@@ -11223,6 +11183,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
+ case X86ISD::VPERM2I128: return "X86ISD::VPERM2I128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -14810,6 +14771,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMILPS:
case X86ISD::VPERMILPD:
case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2I128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 638f80c6e00..e0a0e295783 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -280,6 +280,7 @@ namespace llvm {
VPERMILPS,
VPERMILPD,
VPERM2F128,
+ VPERM2I128,
VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index db2b652c290..ff9c1433fad 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -140,6 +140,7 @@ def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2i128 : SDNode<"X86ISD::VPERM2I128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 2b240fc03a4..28515ba1d6b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7359,7 +7359,7 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256
VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vperm2f128_si_256
- VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
@@ -7375,6 +7375,25 @@ def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
@@ -7571,7 +7590,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
VEX_W;
//===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
//
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -7587,6 +7606,30 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
imm:$src3))]>,
VEX_4V;
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2i128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2i128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2i128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2i128 VR256:$src1, (memopv4i64 addr:$src2),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
//
OpenPOWER on IntegriCloud