summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorGeorge Burgess IV <george.burgess.iv@gmail.com>2017-06-09 03:56:15 +0000
committerGeorge Burgess IV <george.burgess.iv@gmail.com>2017-06-09 03:56:15 +0000
commita20352e13ed895e0c67a328a5da2dc3e5481ee58 (patch)
tree6de91f6be0f28abc2c483c6c485cda409554b572 /llvm/test/Transforms/LoopVectorize
parentcb9327b02db9a13379bb83b499166aff2930050a (diff)
downloadbcm5719-llvm-a20352e13ed895e0c67a328a5da2dc3e5481ee58.tar.gz
bcm5719-llvm-a20352e13ed895e0c67a328a5da2dc3e5481ee58.zip
[LoopVectorize] Don't preserve nsw/nuw flags on shrunken ops.
If we're shrinking a binary operation, it may be the case that the new operations wraps where the old didn't. If this happens, the behavior should be well-defined. So, we can't always carry wrapping flags with us when we shrink operations. If we do, we get incorrect optimizations in cases like: void foo(const unsigned char *from, unsigned char *to, int n) { for (int i = 0; i < n; i++) to[i] = from[i] - 128; } which gets optimized to: void foo(const unsigned char *from, unsigned char *to, int n) { for (int i = 0; i < n; i++) to[i] = from[i] | 128; } Because: - InstCombine turned `sub i32 %from.i, 128` into `add nuw nsw i32 %from.i, 128`. - LoopVectorize vectorized the add to be `add nuw nsw <16 x i8>` with a vector full of `i8 128`s - InstCombine took advantage of the fact that the newly-shrunken add "couldn't wrap", and changed the `add` to an `or`. InstCombine seems happy to figure out whether we can add nuw/nsw on its own, so I just decided to drop the flags. There are already a number of places in LoopVectorize where we rely on InstCombine to clean up. llvm-svn: 305053
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll46
1 files changed, 37 insertions, 9 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index d06e3fdba39..1149afe7b9f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -5,7 +5,7 @@ target triple = "aarch64"
; CHECK-LABEL: @add_a(
; CHECK: load <16 x i8>, <16 x i8>*
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
; CHECK: store <16 x i8>
; Function Attrs: nounwind
define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
@@ -31,9 +31,37 @@ for.body: ; preds = %entry, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
+; working with.
+; CHECK-LABEL: @add_a1(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %add = add nuw nsw i8 %0, 2
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
; CHECK-LABEL: @add_b(
; CHECK: load <8 x i16>, <8 x i16>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
; CHECK: store <8 x i16>
; Function Attrs: nounwind
define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -61,7 +89,7 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: @add_c(
; CHECK: load <8 x i8>, <8 x i8>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
; CHECK: store <8 x i16>
; Function Attrs: nounwind
define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -116,12 +144,12 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: @add_e(
; CHECK: load <16 x i8>
; CHECK: shl <16 x i8>
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
; CHECK: or <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
; CHECK: and <16 x i8>
; CHECK: xor <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
; CHECK: store <16 x i8>
define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
entry:
@@ -162,12 +190,12 @@ for.body: ; preds = %for.body, %for.body
; CHECK: load <8 x i16>
; CHECK: trunc <8 x i16>
; CHECK: shl <8 x i8>
-; CHECK: add nsw <8 x i8>
+; CHECK: add <8 x i8>
; CHECK: or <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
; CHECK: and <8 x i8>
; CHECK: xor <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
; CHECK: store <8 x i8>
define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
entry:
OpenPOWER on IntegriCloud