summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td28
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp89
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h4
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h14
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td17
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td26
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll26
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86.ll40
-rw-r--r--llvm/test/CodeGen/X86/avx.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll33
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll16
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll61
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll28
-rw-r--r--llvm/test/CodeGen/X86/sse41.ll2
-rw-r--r--llvm/test/MC/X86/x86-32-coverage.s18
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp2
18 files changed, 303 insertions, 115 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 881c0a3c883..e9e6730427a 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -886,7 +886,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
}
@@ -896,13 +896,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
@@ -915,17 +915,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, Commutative]>;
}
// Vector sum of absolute differences
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
[IntrNoMem, Commutative]>;
}
@@ -1171,10 +1171,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
- llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
@@ -1187,7 +1187,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector compare
@@ -1710,13 +1710,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
}
// Vector load with broadcast
@@ -1955,7 +1955,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem, Commutative]>;
+ llvm_i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2b62ea24cec..bbb8462652c 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -43,6 +43,22 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
return true;
}
+// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
+// arguments have changed their type from i32 to i8.
+static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check that the last argument is an i32.
+ Type *LastArgType = F->getFunctionType()->getParamType(
+ F->getFunctionType()->getNumParams() - 1);
+ if (!LastArgType->isIntegerTy(32))
+ return false;
+
+ // Move this function aside and map down.
+ F->setName(F->getName() + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
@@ -130,6 +146,51 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name == "x86.sse41.ptestnzc")
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
+ // Several blend and other instructions with maskes used the wrong number of
+ // bits.
+ if (Name == "x86.sse41.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
+ NewFn);
+ if (Name == "x86.sse41.blendpd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
+ NewFn);
+ if (Name == "x86.sse41.blendps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
+ NewFn);
+ if (Name == "x86.sse41.insertps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
+ NewFn);
+ if (Name == "x86.sse41.dppd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
+ NewFn);
+ if (Name == "x86.sse41.dpps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
+ NewFn);
+ if (Name == "x86.sse41.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
+ NewFn);
+ if (Name == "x86.avx.blend.pd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_pd_256, NewFn);
+ if (Name == "x86.avx.blend.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_ps_256, NewFn);
+ if (Name == "x86.avx.dp.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
+ NewFn);
+ if (Name == "x86.avx2.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
+ NewFn);
+ if (Name == "x86.avx2.pblendd.128")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_128, NewFn);
+ if (Name == "x86.avx2.pblendd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_256, NewFn);
+ if (Name == "x86.avx2.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
+ NewFn);
+
// frcz.ss/sd may need to have an argument dropped
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
F->setName(Name + ".old");
@@ -413,6 +474,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
}
+
+ case Intrinsic::x86_sse41_pblendw:
+ case Intrinsic::x86_sse41_blendpd:
+ case Intrinsic::x86_sse41_blendps:
+ case Intrinsic::x86_sse41_insertps:
+ case Intrinsic::x86_sse41_dppd:
+ case Intrinsic::x86_sse41_dpps:
+ case Intrinsic::x86_sse41_mpsadbw:
+ case Intrinsic::x86_avx_blend_pd_256:
+ case Intrinsic::x86_avx_blend_ps_256:
+ case Intrinsic::x86_avx_dp_ps_256:
+ case Intrinsic::x86_avx2_pblendw:
+ case Intrinsic::x86_avx2_pblendd_128:
+ case Intrinsic::x86_avx2_pblendd_256:
+ case Intrinsic::x86_avx2_mpsadbw: {
+ // Need to truncate the last argument from i32 to i8 -- this argument models
+ // an inherently 8-bit immediate operand to these x86 instructions.
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+
+ // Replace the last argument with a trunc.
+ Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
+
+ CallInst *NewCall = Builder.CreateCall(NewFn, Args);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ return;
+ }
}
}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 5d9b6ab5b05..72aeeaac163 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -24,10 +24,6 @@ inline bool isImmSExti32i8Value(uint64_t Value) {
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-inline bool isImmZExtu32u8Value(uint64_t Value) {
- return (Value <= 0x00000000000000FFULL);
-}
-
inline bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 11a84157f58..e0fab8dcaf3 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -153,20 +153,6 @@ struct X86Operand : public MCParsedAsmOperand {
// extension.
return isImmSExti32i8Value(CE->getValue());
}
- bool isImmZExtu32u8() const {
- if (!isImm())
- return false;
-
- // If this isn't a constant expr, just assume it fits and let relaxation
- // handle it.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
- return true;
-
- // Otherwise, check the value is in a range that makes sense for this
- // extension.
- return isImmZExtu32u8Value(CE->getValue());
- }
bool isImmSExti64i8() const {
if (!isImm())
return false;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 10bbfda6fe1..dbb58bb8ba8 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -393,12 +393,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
@@ -538,13 +538,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
- (ins VR128X:$src1, u32u8imm:$src2),
+ (ins VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX;
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
+ (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 79379ad33a9..eba5ec25136 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -83,7 +83,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
@@ -197,7 +197,7 @@ def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index a8743fd83da..867b6caaa81 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -551,11 +551,6 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
-class ImmZExtAsmOperandClass : AsmOperandClass {
- let SuperClasses = [ImmAsmOperand];
- let RenderMethod = "addImmOperands";
-}
-
def X86GR32orGR64AsmOperand : AsmOperandClass {
let Name = "GR32orGR64";
}
@@ -568,6 +563,7 @@ def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -595,12 +591,6 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
-// [0, 0x000000FF]
-def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
- let Name = "ImmZExtu32u8";
-}
-
-
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
@@ -620,11 +610,6 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
-// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
-def u32u8imm : Operand<i32> {
- let ParserMatchClass = ImmZExtu32u8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 2189d14c07b..71cc77214aa 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5379,7 +5379,7 @@ let Predicates = [HasAVX] in {
// the corresponding elements in the second input vector.
def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
- (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
+ (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))),
(VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
// Constant 10 corresponds to the binary mask '1010'.
@@ -5388,16 +5388,16 @@ let Predicates = [HasAVX] in {
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
@@ -5411,11 +5411,11 @@ let Predicates = [UseSSE3] in {
// - the 2nd and 4th element from the second input vector (the fadd node).
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
@@ -6705,7 +6705,7 @@ let Constraints = "$src1 = $dst" in
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -6714,7 +6714,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
Sched<[WriteFShuffle]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -7350,7 +7350,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7359,7 +7359,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
Sched<[itins.Sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -8579,13 +8579,13 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
new file mode 100644
index 00000000000..d2b44cd64ef
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vblendpd
+ %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vblendps
+ %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vdpps
+ %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 7382ddcd740..850d6f2a4a3 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -818,18 +818,18 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vblendpd
- %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vblendps
- %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
@@ -850,35 +850,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vdppd
- %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vdpps
- %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vinsertps
- %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vmpsadbw
- %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -899,10 +899,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vpblendw
- %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
@@ -1770,18 +1770,18 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK: vblendpd
- %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+ %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
-declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vblendps
- %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
@@ -1950,10 +1950,10 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vdpps
- %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
diff --git a/llvm/test/CodeGen/X86/avx.ll b/llvm/test/CodeGen/X86/avx.ll
index 6069c14f0d8..cba6d98f5a8 100644
--- a/llvm/test/CodeGen/X86/avx.ll
+++ b/llvm/test/CodeGen/X86/avx.ll
@@ -60,7 +60,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps $-64, 12(%{{...}},%{{...}}), %
; CHECK-NEXT: ret
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
%2 = load <4 x float>* %1, align 16
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
new file mode 100644
index 00000000000..ac2c73bb932
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+ ; CHECK: vpblendw
+ %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpblendd
+ %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+ ; CHECK: vpblendd
+ %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ; CHECK: vmpsadbw
+ %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index ab3d591e1d9..31635de3400 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -475,10 +475,10 @@ declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK: vmpsadbw
- %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
@@ -499,10 +499,10 @@ declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounw
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK: vpblendw
- %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
@@ -706,18 +706,18 @@ declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind re
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpblendd
- %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+ %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpblendd
- %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+ %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
new file mode 100644
index 00000000000..6fab98e70a8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+; This test works just like the non-upgrade one except that it only checks
+; forms which require auto-upgrading.
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: blendpd
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: blendps
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: dppd
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: dpps
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: insertps
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: mpsadbw
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: pblendw
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 37eff43b28c..5f25a16380d 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -2,18 +2,18 @@
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: blendpd
- %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: blendps
- %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
@@ -34,35 +34,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: dppd
- %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: dpps
- %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: insertps
- %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: mpsadbw
- %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -83,10 +83,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: pblendw
- %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll
index 986488f531e..aa305e2b3cf 100644
--- a/llvm/test/CodeGen/X86/sse41.ll
+++ b/llvm/test/CodeGen/X86/sse41.ll
@@ -618,7 +618,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps $192, 12(%{{...}},%{{...}}), %
+; CHECK: insertps $-64, 12(%{{...}},%{{...}}), %
; CHECK-NEXT: ret
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
%2 = load <4 x float>* %1, align 16
diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s
index 732874b9187..80c34ece624 100644
--- a/llvm/test/MC/X86/x86-32-coverage.s
+++ b/llvm/test/MC/X86/x86-32-coverage.s
@@ -19618,22 +19618,36 @@
// CHECK: blendvps %xmm2, %xmm1 # encoding: [0x66,0x0f,0x38,0x14,0xca]
blendvps %xmm2, %xmm1
-// rdar://9795008
-// These instructions take a mask not an 8-bit sign extended value.
+// These instructions can take an unsigned 8-bit mask as well as a signed 8-bit
+// immediate. Check both forms here.
// CHECK: blendps $129, %xmm2, %xmm1
blendps $0x81, %xmm2, %xmm1
+// CHECK: blendps $-64, %xmm2, %xmm1
+ blendps $-64, %xmm2, %xmm1
// CHECK: blendpd $129, %xmm2, %xmm1
blendpd $0x81, %xmm2, %xmm1
+// CHECK: blendpd $-64, %xmm2, %xmm1
+ blendpd $-64, %xmm2, %xmm1
// CHECK: pblendw $129, %xmm2, %xmm1
pblendw $0x81, %xmm2, %xmm1
+// CHECK: pblendw $-64, %xmm2, %xmm1
+ pblendw $-64, %xmm2, %xmm1
// CHECK: mpsadbw $129, %xmm2, %xmm1
mpsadbw $0x81, %xmm2, %xmm1
+// CHECK: mpsadbw $-64, %xmm2, %xmm1
+ mpsadbw $-64, %xmm2, %xmm1
// CHECK: dpps $129, %xmm2, %xmm1
dpps $0x81, %xmm2, %xmm1
+// CHECK: dpps $-64, %xmm2, %xmm1
+ dpps $-64, %xmm2, %xmm1
// CHECK: dppd $129, %xmm2, %xmm1
dppd $0x81, %xmm2, %xmm1
+// CHECK: dppd $-64, %xmm2, %xmm1
+ dppd $-64, %xmm2, %xmm1
// CHECK: insertps $129, %xmm2, %xmm1
insertps $0x81, %xmm2, %xmm1
+// CHECK: insertps $-64, %xmm2, %xmm1
+ insertps $-64, %xmm2, %xmm1
// PR13253 handle implicit optional third argument that must always be xmm0
// CHECK: pblendvb %xmm2, %xmm1
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index a891d66c016..aa4f37b6cdb 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -912,7 +912,6 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("i32mem", TYPE_Mv)
TYPE("i32imm", TYPE_IMMv)
TYPE("i32i8imm", TYPE_IMM32)
- TYPE("u32u8imm", TYPE_IMM32)
TYPE("GR32", TYPE_R32)
TYPE("GR32orGR64", TYPE_R32)
TYPE("i64mem", TYPE_Mv)
@@ -1015,7 +1014,6 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("i16imm", ENCODING_IW)
}
ENCODING("i32i8imm", ENCODING_IB)
- ENCODING("u32u8imm", ENCODING_IB)
ENCODING("SSECC", ENCODING_IB)
ENCODING("AVXCC", ENCODING_IB)
ENCODING("AVX512RC", ENCODING_IB)
OpenPOWER on IntegriCloud