diff options
author | Craig Topper <craig.topper@intel.com> | 2018-01-20 00:26:08 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-01-20 00:26:08 +0000 |
commit | 0d797a34d8be0707c4f17e88d7d22004d844fbf4 (patch) | |
tree | e350faaa70f176cdc5da5f9d6826a26d54defdfc /llvm/test/Transforms/LoopVectorize | |
parent | 2f8af79927c5902900a7ddeef2300ea92a9fcf22 (diff) | |
download | bcm5719-llvm-0d797a34d8be0707c4f17e88d7d22004d844fbf4.tar.gz bcm5719-llvm-0d797a34d8be0707c4f17e88d7d22004d844fbf4.zip |
[X86] Add support for passing 'prefer-vector-width' function attribute into X86Subtarget and exposing via X86's getRegisterWidth TTI interface.
This will cause the vectorizers to do some limiting of the vector widths they create. This is not a strict limit. There are reasons I know of that the loop vectorizer will generate larger vectors for.
I've written this in such a way that the interface will only return a properly supported width(0/128/256/512) even if the attribute says something funny like 384 or 10.
This has been split from D41895 with the remainder in a follow up commit.
llvm-svn: 323015
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/avx512.ll | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll index c7844c11306..0917e007224 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll @@ -1,4 +1,5 @@ ; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s +; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -10,6 +11,12 @@ target triple = "x86_64-apple-macosx10.9.0" ; CHECK: vmovdqu64 %zmm{{.}}, ; CHECK-NOT: %ymm +; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to + +; CHECK-PREFER-AVX256-LABEL: f: +; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, +; CHECK-PREFER-AVX256-NOT: %zmm + define void @f(i32* %a, i32 %n) { entry: %cmp4 = icmp sgt i32 %n, 0 @@ -33,3 +40,73 @@ for.end.loopexit: ; preds = %for.body for.end: ; preds = %for.end.loopexit, %entry ret void } + +; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit +; vectors + +; CHECK-LABEL: g: +; CHECK: vmovdqu %ymm{{.}}, +; CHECK-NOT: %zmm + +; CHECK-PREFER-AVX256-LABEL: g: +; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, +; CHECK-PREFER-AVX256-NOT: %zmm + +define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; Verify that the "prefer-vector-width=512" attribute override the subtarget +; vectors + +; CHECK-LABEL: h: +; CHECK: vmovdqu64 %zmm{{.}}, +; CHECK-NOT: %ymm + +; CHECK-PREFER-AVX256-LABEL: h: +; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}}, +; CHECK-PREFER-AVX256-NOT: %ymm + +define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} |