summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp35
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrXOP.td42
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
6 files changed, 74 insertions, 17 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 689859d4f67..99aa2376fed 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -282,6 +282,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = F;
return true;
}
+ // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
+ if (Name.startswith("x86.xop.vpermil2")) {
+ auto Params = F->getFunctionType()->params();
+ auto Idx = Params[2];
+ if (Idx->getScalarType()->isFloatingPointTy()) {
+ F->setName(Name + ".old");
+ unsigned IdxSize = Idx->getPrimitiveSizeInBits();
+ unsigned EltSize = Idx->getScalarSizeInBits();
+ Intrinsic::ID Permil2ID;
+ if (EltSize == 64 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd;
+ else if (EltSize == 32 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps;
+ else if (EltSize == 64 && IdxSize == 256)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
+ else
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
+ return true;
+ }
+ }
break;
}
}
@@ -911,6 +932,20 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
+ case Intrinsic::x86_xop_vpermil2pd:
+ case Intrinsic::x86_xop_vpermil2ps:
+ case Intrinsic::x86_xop_vpermil2pd_256:
+ case Intrinsic::x86_xop_vpermil2ps_256: {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
+ VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
+ Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
+ CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestnzc: {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5babe6e8515..021e1767da8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21947,6 +21947,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPSHL: return "X86ISD::VPSHL";
case X86ISD::VPCOM: return "X86ISD::VPCOM";
case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
+ case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index ab7cf955cce..f532cddeaa3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -451,6 +451,8 @@ namespace llvm {
VPCOM, VPCOMU,
// XOP packed permute bytes.
VPPERM,
+ // XOP two source permutation.
+ VPERMIL2,
// Vector multiply packed unsigned doubleword integers.
PMULUDQ,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 858fb4f2f10..958bb822a06 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -245,7 +245,12 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisVT<3, i8>]>>;
-
+def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
+ SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameSizeAs<0,3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ SDTCisVT<4, i8>]>>;
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td
index 57e6c1aec64..f49917b80f3 100644
--- a/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/llvm/lib/Target/X86/X86InstrXOP.td
@@ -342,27 +342,34 @@ let Predicates = [HasXOP] in {
(VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
}
-multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
- Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
+multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128, ValueType vt256,
+ ValueType id128, ValueType id256,
+ PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ (id128 VR128:$src3), (i8 imm:$src4))))]>;
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ (id128 (bitconvert (loadv2i64 addr:$src3))),
+ (i8 imm:$src4))))]>,
VEX_W, MemOp4;
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
+ (vt128 (OpNode (vt128 VR128:$src1),
+ (vt128 (bitconvert (ld_128 addr:$src2))),
+ (id128 VR128:$src3), (i8 imm:$src4))))]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
@@ -376,21 +383,24 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
+ (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
+ (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
- VEX_W, MemOp4, VEX_L;
+ (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
+ (id256 (bitconvert (loadv4i64 addr:$src3))),
+ (i8 imm:$src4))))]>, VEX_W, MemOp4, VEX_L;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,
- VEX_L;
+ (vt256 (OpNode (vt256 VR256:$src1),
+ (vt256 (bitconvert (ld_256 addr:$src2))),
+ (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrY_REV : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
@@ -401,10 +411,10 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
}
let ExeDomain = SSEPackedDouble in
- defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
- int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;
+ defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
+ v2i64, v4i64, loadv2f64, loadv4f64>;
let ExeDomain = SSEPackedSingle in
- defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
- int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;
+ defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
+ v4i32, v8i32, loadv4f32, loadv8f32>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index ed8da4e24fe..b0eea57fbb3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -2234,6 +2234,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
+ X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
OpenPOWER on IntegriCloud