diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 89 | ||||
-rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86Operand.h | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 26 |
7 files changed, 109 insertions, 53 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 2b62ea24cec..bbb8462652c 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -43,6 +43,22 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, return true; } +// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask +// arguments have changed their type from i32 to i8. +static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, + Function *&NewFn) { + // Check that the last argument is an i32. + Type *LastArgType = F->getFunctionType()->getParamType( + F->getFunctionType()->getNumParams() - 1); + if (!LastArgType->isIntegerTy(32)) + return false; + + // Move this function aside and map down. + F->setName(F->getName() + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + return true; +} + static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); @@ -130,6 +146,51 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.sse41.ptestnzc") return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); } + // Several blend and other instructions with maskes used the wrong number of + // bits. + if (Name == "x86.sse41.pblendw") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw, + NewFn); + if (Name == "x86.sse41.blendpd") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd, + NewFn); + if (Name == "x86.sse41.blendps") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps, + NewFn); + if (Name == "x86.sse41.insertps") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, + NewFn); + if (Name == "x86.sse41.dppd") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, + NewFn); + if (Name == "x86.sse41.dpps") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, + NewFn); + if (Name == "x86.sse41.mpsadbw") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, + NewFn); + if (Name == "x86.avx.blend.pd.256") + return UpgradeX86IntrinsicsWith8BitMask( + F, Intrinsic::x86_avx_blend_pd_256, NewFn); + if (Name == "x86.avx.blend.ps.256") + return UpgradeX86IntrinsicsWith8BitMask( + F, Intrinsic::x86_avx_blend_ps_256, NewFn); + if (Name == "x86.avx.dp.ps.256") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, + NewFn); + if (Name == "x86.avx2.pblendw") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw, + NewFn); + if (Name == "x86.avx2.pblendd.128") + return UpgradeX86IntrinsicsWith8BitMask( + F, Intrinsic::x86_avx2_pblendd_128, NewFn); + if (Name == "x86.avx2.pblendd.256") + return UpgradeX86IntrinsicsWith8BitMask( + F, Intrinsic::x86_avx2_pblendd_256, NewFn); + if (Name == "x86.avx2.mpsadbw") + return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, + NewFn); + // frcz.ss/sd may need to have an argument dropped if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { F->setName(Name + ".old"); @@ -413,6 +474,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; } + + case Intrinsic::x86_sse41_pblendw: + case Intrinsic::x86_sse41_blendpd: + case Intrinsic::x86_sse41_blendps: + case Intrinsic::x86_sse41_insertps: + case Intrinsic::x86_sse41_dppd: + case Intrinsic::x86_sse41_dpps: + case Intrinsic::x86_sse41_mpsadbw: + case Intrinsic::x86_avx_blend_pd_256: + case Intrinsic::x86_avx_blend_ps_256: + case Intrinsic::x86_avx_dp_ps_256: + case Intrinsic::x86_avx2_pblendw: + case Intrinsic::x86_avx2_pblendd_128: + case Intrinsic::x86_avx2_pblendd_256: + case Intrinsic::x86_avx2_mpsadbw: { + // Need to truncate the last argument from i32 to i8 -- this argument models + // an inherently 8-bit immediate operand to these x86 instructions. + SmallVector<Value *, 4> Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + + // Replace the last argument with a trunc. + Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); + + CallInst *NewCall = Builder.CreateCall(NewFn, Args); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + return; + } } } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h index 5d9b6ab5b05..72aeeaac163 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h @@ -24,10 +24,6 @@ inline bool isImmSExti32i8Value(uint64_t Value) { (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } -inline bool isImmZExtu32u8Value(uint64_t Value) { - return (Value <= 0x00000000000000FFULL); -} - inline bool isImmSExti64i8Value(uint64_t Value) { return (( Value <= 0x000000000000007FULL)|| (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 11a84157f58..e0fab8dcaf3 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -153,20 +153,6 @@ struct X86Operand : public MCParsedAsmOperand { // extension. return isImmSExti32i8Value(CE->getValue()); } - bool isImmZExtu32u8() const { - if (!isImm()) - return false; - - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) - return true; - - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmZExtu32u8Value(CE->getValue()); - } bool isImmSExti64i8() const { if (!isImm()) return false; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 10bbfda6fe1..dbb58bb8ba8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -393,12 +393,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), + (ins VR128X:$src1, VR128X:$src2, i8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, EVEX_4V; def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), - (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), + (ins VR128X:$src1, f32mem:$src2, i8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), @@ -538,13 +538,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), // vextractps - extract 32 bits from XMM def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), - (ins VR128X:$src1, u32u8imm:$src2), + (ins VR128X:$src1, i32i8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX; def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), - (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), + (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 79379ad33a9..eba5ec25136 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -83,7 +83,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; def X86insertps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; + SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; @@ -197,7 +197,7 @@ def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index a8743fd83da..867b6caaa81 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -551,11 +551,6 @@ class ImmSExtAsmOperandClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } -class ImmZExtAsmOperandClass : AsmOperandClass { - let SuperClasses = [ImmAsmOperand]; - let RenderMethod = "addImmOperands"; -} - def X86GR32orGR64AsmOperand : AsmOperandClass { let Name = "GR32orGR64"; } @@ -568,6 +563,7 @@ def AVX512RC : Operand<i32> { let PrintMethod = "printRoundingControl"; let OperandType = "OPERAND_IMMEDIATE"; } + // Sign-extended immediate classes. We don't need to define the full lattice // here because there is no instruction with an ambiguity between ImmSExti64i32 // and ImmSExti32i8. @@ -595,12 +591,6 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } -// [0, 0x000000FF] -def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass { - let Name = "ImmZExtu32u8"; -} - - // [0, 0x0000007F] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { @@ -620,11 +610,6 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } -// 32-bits but only 8 bits are significant, and those 8 bits are unsigned. -def u32u8imm : Operand<i32> { - let ParserMatchClass = ImmZExtu32u8AsmOperand; - let OperandType = "OPERAND_IMMEDIATE"; -} // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2189d14c07b..71cc77214aa 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5379,7 +5379,7 @@ let Predicates = [HasAVX] in { // the corresponding elements in the second input vector. def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)), - (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))), + (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))), (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; // Constant 10 corresponds to the binary mask '1010'. @@ -5388,16 +5388,16 @@ let Predicates = [HasAVX] in { // - the 2nd and 4th element from the second input vector (the 'fadd' node). def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), - (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), - (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), - (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))), (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), - (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), + (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))), (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), @@ -5411,11 +5411,11 @@ let Predicates = [UseSSE3] in { // - the 2nd and 4th element from the second input vector (the fadd node). def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), - (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))), (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), - (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), + (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))), (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), @@ -6705,7 +6705,7 @@ let Constraints = "$src1 = $dst" in multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, OpndItins itins = DEFAULT_ITINS> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -6714,7 +6714,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, Sched<[WriteFShuffle]>; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), + (ins VR128:$src1, f32mem:$src2, i8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -7350,7 +7350,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, u32u8imm:$src3), + (ins RC:$src1, RC:$src2, i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -7359,7 +7359,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, Sched<[itins.Sched]>; def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -8579,13 +8579,13 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { let isCommutable = 1 in def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, u32u8imm:$src3), + (ins RC:$src1, RC:$src2, i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, Sched<[WriteBlend]>, VEX_4V; def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, |