diff options
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetMachine.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/avx512.ll | 77 |
6 files changed, 131 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 6141df7a0f0..6e9f1d5c309 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -334,6 +334,10 @@ def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast.">; +def FeaturePrefer256Bit + : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", + "Prefer 256-bit AVX instructions">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index f4478d182a9..082a4f74d46 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -254,6 +254,12 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { GatherOverhead = 2; if (hasAVX512()) ScatterOverhead = 2; + + // Consume the vector width attribute or apply any target specific limit. + if (PreferVectorWidthOverride) + PreferVectorWidth = PreferVectorWidthOverride; + else if (Prefer256Bit) + PreferVectorWidth = 256; } void X86Subtarget::initializeEnvironment() { @@ -347,6 +353,8 @@ void X86Subtarget::initializeEnvironment() { X86ProcFamily = Others; GatherOverhead = 1024; ScatterOverhead = 1024; + PreferVectorWidth = UINT32_MAX; + Prefer256Bit = false; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, @@ -358,10 +366,12 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride) + unsigned StackAlignOverride, + unsigned PreferVectorWidthOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), + PreferVectorWidthOverride(PreferVectorWidthOverride), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 77f4a16d1e4..08cc28eed8c 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -359,6 +359,9 @@ protected: /// unsigned MaxInlineSizeThreshold; + /// Indicates target prefers 256 bit instructions. + bool Prefer256Bit; + /// What processor and OS we're targeting. Triple TargetTriple; @@ -375,6 +378,13 @@ private: /// Override the stack alignment. unsigned StackAlignOverride; + /// Preferred vector width from function attribute. + unsigned PreferVectorWidthOverride; + + /// Resolved preferred vector width from function attribute and subtarget + /// features. + unsigned PreferVectorWidth; + /// True if compiling for 64-bit, false for 16-bit or 32-bit. bool In64BitMode; @@ -400,7 +410,8 @@ public: /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride); + const X86TargetMachine &TM, unsigned StackAlignOverride, + unsigned PreferVectorWidthOverride); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; @@ -584,6 +595,8 @@ public: bool hasCLWB() const { return HasCLWB; } bool hasRDPID() const { return HasRDPID; } + unsigned getPreferVectorWidth() const { return PreferVectorWidth; } + bool isXRaySupported() const override { return is64Bit(); } X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 48e2073c41c..b3bf9269f73 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -255,7 +255,24 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { if (SoftFloat) Key += FS.empty() ? "+soft-float" : ",+soft-float"; - FS = Key.substr(CPU.size()); + // Keep track of the key width after all features are added so we can extract + // the feature string out later. + unsigned CPUFSWidth = Key.size(); + + // Translate vector width function attribute into subtarget features. This + // overrides any CPU specific turning parameter + unsigned PreferVectorWidthOverride = 0; + if (F.hasFnAttribute("prefer-vector-width")) { + StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString(); + unsigned Width; + if (!Val.getAsInteger(0, Width)) { + Key += ",prefer-vector-width="; + Key += Val; + PreferVectorWidthOverride = Width; + } + } + + FS = Key.slice(CPU.size(), CPUFSWidth); auto &I = SubtargetMap[Key]; if (!I) { @@ -264,7 +281,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride); + Options.StackAlignmentOverride, + PreferVectorWidthOverride); } return I.get(); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 967d67a84bc..e24c8dfcd54 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -130,12 +130,13 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { } unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const { + unsigned PreferVectorWidth = ST->getPreferVectorWidth(); if (Vector) { - if (ST->hasAVX512()) + if (ST->hasAVX512() && PreferVectorWidth >= 512) return 512; - if (ST->hasAVX()) + if (ST->hasAVX() && PreferVectorWidth >= 256) return 256; - if (ST->hasSSE1()) + if (ST->hasSSE1() && PreferVectorWidth >= 128) return 128; return 0; } @@ -2523,7 +2524,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) { // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only // enable gather with a -march. return (DataWidth == 32 || DataWidth == 64) && - (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())); + (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())); } bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) { diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll index c7844c11306..0917e007224 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll @@ -1,4 +1,5 @@ ; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s +; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -10,6 +11,12 @@ target triple = "x86_64-apple-macosx10.9.0" ; CHECK: vmovdqu64 %zmm{{.}}, ; CHECK-NOT: %ymm +; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to + +; CHECK-PREFER-AVX256-LABEL: f: +; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, +; CHECK-PREFER-AVX256-NOT: %zmm + define void @f(i32* %a, i32 %n) { entry: %cmp4 = icmp sgt i32 %n, 0 @@ -33,3 +40,73 @@ for.end.loopexit: ; preds = %for.body for.end: ; preds = %for.end.loopexit, %entry ret void } + +; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit +; vectors + +; CHECK-LABEL: g: +; CHECK: vmovdqu %ymm{{.}}, +; CHECK-NOT: %zmm + +; CHECK-PREFER-AVX256-LABEL: g: +; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, +; CHECK-PREFER-AVX256-NOT: %zmm + +define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; Verify that the "prefer-vector-width=512" attribute override the subtarget +; vectors + +; CHECK-LABEL: h: +; CHECK: vmovdqu64 %zmm{{.}}, +; CHECK-NOT: %ymm + +; CHECK-PREFER-AVX256-LABEL: h: +; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}}, +; CHECK-PREFER-AVX256-NOT: %ymm + +define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} |