diff options
| author | David Green <david.green@arm.com> | 2019-09-15 14:14:47 +0000 |
|---|---|---|
| committer | David Green <david.green@arm.com> | 2019-09-15 14:14:47 +0000 |
| commit | b325c057322ce14b5c561d8ac49508adab7649e5 (patch) | |
| tree | 61afc1dbb9a328634e30c70be0261320a0fdc36d /llvm/test/Transforms/LoopVectorize | |
| parent | b6a0faaa0c793aede7911be241b1895a9ebea41c (diff) | |
| download | bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.tar.gz bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.zip | |
[ARM] Masked loads and stores
Masked loads and store fit naturally with MVE, the instructions being easily
predicated. This adds lowering for the simple cases of masked loads and stores.
It does not yet deal with widening/narrowing or pre/post inc, and so is
currently behind an option.
The llvm masked load intrinsic will accept a "passthru" value, dictating the
values used for the zero masked lanes. In MVE the instructions write 0 to the
zero predicated lanes, so we need to match a passthru that isn't 0 (or undef)
with a select instruction to pull in the correct data after the load.
Differential Revision: https://reviews.llvm.org/D67186
llvm-svn: 371932
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll new file mode 100644 index 00000000000..38d838576bd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll @@ -0,0 +1,40 @@ +; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1-m.main-none-eabi" + +; CHECK-LABEL: test +; CHECK: llvm.masked.store.v4i32.p0v4i32 +define void @test(i32* nocapture %A, i32 %n) #0 { +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc + %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013 + %0 = load i32, i32* %arrayidx, align 4 + %.off = add i32 %0, 9 + %1 = icmp ult i32 %.off, 19 + br i1 %1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i32 %i.013, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.inc + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +} + +attributes #0 = { "target-features"="+mve" } |

