diff options
| author | Fangrui Song <maskray@google.com> | 2019-08-23 02:17:04 +0000 |
|---|---|---|
| committer | Fangrui Song <maskray@google.com> | 2019-08-23 02:17:04 +0000 |
| commit | 3fc933af8b49519658e4c2fd82f93c6d680c8a08 (patch) | |
| tree | f4be5e7421f3997fc0e85fdbbee52eb9fe1e5bc4 /llvm/test/Transforms/AlignmentFromAssumptions | |
| parent | 7fbadf3b2793bf907d5a1fb7c1c9078500a0b030 (diff) | |
| download | bcm5719-llvm-3fc933af8b49519658e4c2fd82f93c6d680c8a08.tar.gz bcm5719-llvm-3fc933af8b49519658e4c2fd82f93c6d680c8a08.zip | |
[AlignmentFromAssumptions] getNewAlignmentDiff(): use getURemExpr()
The alignment is calculated incorrectly, thus sometimes it doesn't generate aligned mov instructions, as shown by the example below:
```
// b.cc
typedef long long index;
extern "C" index g_tid;
extern "C" index g_num;
void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
index n = 64*1024;
index m = 16*1024;
index k = 4*1024;
index tid = g_tid;
index num = g_num;
__builtin_assume_aligned(a, 32);
__builtin_assume_aligned(b, 32);
__builtin_assume_aligned(c, 32);
for (index i0=tid*k; i0<m; i0+=num*k)
for (index i1=0; i1<n*m; i1+=m)
for (index i2=0; i2<k; i2++)
c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
}
```
Compile with `clang b.cc -Ofast -march=skylake -mavx2 -S`
```
vmovaps -224(%rdi,%rbx,4), %ymm0
vmovups -192(%rdi,%rbx,4), %ymm1 # should be movaps
vmovups -160(%rdi,%rbx,4), %ymm2 # should be movaps
vmovups -128(%rdi,%rbx,4), %ymm3 # should be movaps
vaddps -224(%rsi,%rbx,4), %ymm0, %ymm0
vaddps -192(%rsi,%rbx,4), %ymm1, %ymm1
vaddps -160(%rsi,%rbx,4), %ymm2, %ymm2
vaddps -128(%rsi,%rbx,4), %ymm3, %ymm3
vmovaps %ymm0, -224(%rdx,%rbx,4)
vmovups %ymm1, -192(%rdx,%rbx,4) # should be movaps
vmovups %ymm2, -160(%rdx,%rbx,4) # should be movaps
vmovups %ymm3, -128(%rdx,%rbx,4) # should be movaps
```
Differential Revision: https://reviews.llvm.org/D66575
Patch by Dun Liang
llvm-svn: 369723
Diffstat (limited to 'llvm/test/Transforms/AlignmentFromAssumptions')
| -rw-r--r-- | llvm/test/Transforms/AlignmentFromAssumptions/simple.ll | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll index 6ee08b81e27..14e764f042c 100644 --- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll @@ -90,6 +90,61 @@ for.end: ; preds = %for.body ; CHECK: ret i32 %add.lcssa } +; test D66575 +; def hoo2(a, id, num): +; for i0 in range(id*64, 4096, num*64): +; for i1 in range(0, 4096, 32): +; for i2 in range(0, 4096, 32): +; load(a, i0+i1+i2+32) +define void @hoo2(i32* nocapture %a, i64 %id, i64 %num) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %id.mul = shl nsw i64 %id, 6 + %num.mul = shl nsw i64 %num, 6 + br label %for0.body + +for0.body: + %i0 = phi i64 [ %id.mul, %entry ], [ %i0.next, %for0.end ] + br label %for1.body + +for1.body: + %i1 = phi i64 [ 0, %for0.body ], [ %i1.next, %for1.end ] + br label %for2.body + +for2.body: + %i2 = phi i64 [ 0, %for1.body ], [ %i2.next, %for2.body ] + + %t1 = add nuw nsw i64 %i0, %i1 + %t2 = add nuw nsw i64 %t1, %i2 + %t3 = add nuw nsw i64 %t2, 32 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %t3 + %x = load i32, i32* %arrayidx, align 4 + + %i2.next = add nuw nsw i64 %i2, 32 + %cmp2 = icmp ult i64 %i2.next, 4096 + br i1 %cmp2, label %for2.body, label %for1.end + +for1.end: + %i1.next = add nuw nsw i64 %i1, 32 + %cmp1 = icmp ult i64 %i1.next, 4096 + br i1 %cmp1, label %for1.body, label %for0.end + +for0.end: + %i0.next = add nuw nsw i64 %i0, %num.mul + %cmp0 = icmp ult i64 %i0.next, 4096 + br i1 %cmp0, label %for0.body, label %return + +return: + ret void + +; CHECK-LABEL: @hoo2 +; CHECK: load i32, i32* %arrayidx, align 32 +; CHECK: ret void +} + define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { entry: %ptrint = ptrtoint i32* %a to i64 |

