diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 60 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetMachine.cpp | 21 |
4 files changed, 77 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88411c1337d..ac3d7b550ce 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1144,12 +1144,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } + // This block controls legalization of the mask vector sizes that are + // available with AVX512. 512-bit vectors are in a separate block controlled + // by useAVX512Regs. if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { - addRegisterClass(MVT::v16i32, &X86::VR512RegClass); - addRegisterClass(MVT::v16f32, &X86::VR512RegClass); - addRegisterClass(MVT::v8i64, &X86::VR512RegClass); - addRegisterClass(MVT::v8f64, &X86::VR512RegClass); - addRegisterClass(MVT::v1i1, &X86::VK1RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); addRegisterClass(MVT::v4i1, &X86::VK4RegClass); @@ -1160,8 +1158,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); - setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); - setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); @@ -1200,6 +1196,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + } + + // This block controls legalization for 512-bit operations with 32/64 bit + // elements. 512-bits can be disabled based on prefer-vector-width and + // required-vector-width function attributes. + if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { + addRegisterClass(MVT::v16i32, &X86::VR512RegClass); + addRegisterClass(MVT::v16f32, &X86::VR512RegClass); + addRegisterClass(MVT::v8i64, &X86::VR512RegClass); + addRegisterClass(MVT::v8f64, &X86::VR512RegClass); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); @@ -1222,7 +1228,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); @@ -1352,6 +1360,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } }// has AVX-512 + // This block controls legalization for operations that don't have + // pre-AVX512 equivalents. Without VLX we use 512-bit operations for + // narrower widths. if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // These operations are handled on non-VLX by artificially widening in // isel patterns. @@ -1406,10 +1417,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } + // This block control legalization of v32i1/v64i1 which are available with + // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with + // useBWIRegs. if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { - addRegisterClass(MVT::v32i16, &X86::VR512RegClass); - addRegisterClass(MVT::v64i8, &X86::VR512RegClass); - addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass); @@ -1439,6 +1450,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); + } + + // This block controls legalization for v32i16 and v64i8. 512-bits can be + // disabled based on prefer-vector-width and required-vector-width function + // attributes. + if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { + addRegisterClass(MVT::v32i16, &X86::VR512RegClass); + addRegisterClass(MVT::v64i8, &X86::VR512RegClass); + // Extends from v64i1 masks to 512-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); @@ -30049,7 +30069,7 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, EVT VT = N->getValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) && - (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64))) + (!Subtarget.useAVX512Regs() || (VT != MVT::v16f32 && VT != MVT::v8f64))) return false; // We only handle target-independent shuffles. @@ -31086,7 +31106,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, return SDValue(); unsigned RegSize = 128; - if (Subtarget.hasBWI()) + if (Subtarget.useBWIRegs()) RegSize = 512; else if (Subtarget.hasAVX2()) RegSize = 256; @@ -32664,7 +32684,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (Subtarget.getProcFamily() != X86Subtarget::IntelKNL && ((VT == MVT::v4i32 && Subtarget.hasSSE2()) || (VT == MVT::v8i32 && Subtarget.hasAVX2()) || - (VT == MVT::v16i32 && Subtarget.hasBWI()))) { + (VT == MVT::v16i32 && Subtarget.useBWIRegs()))) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); APInt Mask17 = APInt::getHighBitsSet(32, 17); @@ -34190,7 +34210,7 @@ SDValue SplitBinaryOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Op1, F Builder) { assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); unsigned NumSubs = 1; - if (Subtarget.hasBWI()) { + if (Subtarget.useBWIRegs()) { if (VT.getSizeInBits() > 512) { NumSubs = VT.getSizeInBits() / 512; assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); @@ -36181,7 +36201,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, // Also use this if we don't have SSE41 to allow the legalizer do its job. if (!Subtarget.hasSSE41() || VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256()) || - (VT.is512BitVector() && Subtarget.hasAVX512())) { + (VT.is512BitVector() && Subtarget.useAVX512Regs())) { SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); return Opcode == ISD::SIGN_EXTEND ? DAG.getSignExtendVectorInReg(ExOp, DL, VT) @@ -36214,7 +36234,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, // On pre-AVX512 targets, split into 256-bit nodes of // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256)) + if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256)) return SplitAndExtendInReg(256); return SDValue(); @@ -37169,7 +37189,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); unsigned RegSize = 128; - if (Subtarget.hasBWI()) + if (Subtarget.useBWIRegs()) RegSize = 512; else if (Subtarget.hasAVX2()) RegSize = 256; @@ -37214,7 +37234,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); unsigned RegSize = 128; - if (Subtarget.hasBWI()) + if (Subtarget.useBWIRegs()) RegSize = 512; else if (Subtarget.hasAVX2()) RegSize = 256; @@ -37442,8 +37462,8 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && !(Subtarget.hasSSE41() && (VT == MVT::v8i32)) && !(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && - !(Subtarget.hasBWI() && (VT == MVT::v64i8 || VT == MVT::v32i16 || - VT == MVT::v16i32 || VT == MVT::v8i64))) + !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 || + VT == MVT::v16i32 || VT == MVT::v8i64))) return SDValue(); SDValue SubusLHS, SubusRHS; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 7c4af0aea57..0a6eda7e243 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -373,11 +373,13 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride, - unsigned PreferVectorWidthOverride) + unsigned PreferVectorWidthOverride, + unsigned RequiredVectorWidth) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), PreferVectorWidthOverride(PreferVectorWidthOverride), + RequiredVectorWidth(RequiredVectorWidth), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index b8adb63ef03..4a23f4b1a2f 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -407,6 +407,9 @@ private: /// features. unsigned PreferVectorWidth; + /// Required vector width from function attribute. + unsigned RequiredVectorWidth; + /// True if compiling for 64-bit, false for 16-bit or 32-bit. bool In64BitMode; @@ -433,7 +436,8 @@ public: /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride, - unsigned PreferVectorWidthOverride); + unsigned PreferVectorWidthOverride, + unsigned RequiredVectorWidth); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; @@ -622,6 +626,7 @@ public: bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } + unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } // Helper functions to determine when we should allow widening to 512-bit // during codegen. @@ -634,6 +639,16 @@ public: return hasBWI() && canExtendTo512DQ(); } + // If there are no 512-bit vectors and we prefer not to use 512-bit registers, + // disable them in the legalizer. + bool useAVX512Regs() const { + return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256); + } + + bool useBWIRegs() const { + return hasBWI() && useAVX512Regs(); + } + bool isXRaySupported() const override { return is64Bit(); } X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 5f67949f8ef..c79f1b41722 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -259,8 +259,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // the feature string out later. unsigned CPUFSWidth = Key.size(); - // Translate vector width function attribute into subtarget features. This - // overrides any CPU specific turning parameter + // Extract prefer-vector-width attribute. unsigned PreferVectorWidthOverride = 0; if (F.hasFnAttribute("prefer-vector-width")) { StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString(); @@ -272,6 +271,21 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { } } + // Extract required-vector-width attribute. + unsigned RequiredVectorWidth = UINT32_MAX; + if (F.hasFnAttribute("required-vector-width")) { + StringRef Val = F.getFnAttribute("required-vector-width").getValueAsString(); + unsigned Width; + if (!Val.getAsInteger(0, Width)) { + Key += ",required-vector-width="; + Key += Val; + RequiredVectorWidth = Width; + } + } + + // Extracted here so that we make sure there is backing for the StringRef. If + // we assigned earlier, its possible the SmallString reallocated leaving a + // dangling StringRef. FS = Key.slice(CPU.size(), CPUFSWidth); auto &I = SubtargetMap[Key]; @@ -282,7 +296,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { resetTargetOptions(F); I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, Options.StackAlignmentOverride, - PreferVectorWidthOverride); + PreferVectorWidthOverride, + RequiredVectorWidth); } return I.get(); } |

