[ARM] Masked loads and stores

Masked loads and store fit naturally with MVE, the instructions being easily predicated. This adds lowering for the simple cases of masked loads and stores. It does not yet deal with widening/narrowing or pre/post inc, and so is currently behind an option. The llvm masked load intrinsic will accept a "passthru" value, dictating the values used for the zero masked lanes. In MVE the instructions write 0 to the zero predicated lanes, so we need to match a passthru that isn't 0 (or undef) with a select instruction to pull in the correct data after the load. Differential Revision: https://reviews.llvm.org/D67186 llvm-svn: 371932
author: David Green <david.green@arm.com> 2019-09-15 14:14:47 +0000
committer: David Green <david.green@arm.com> 2019-09-15 14:14:47 +0000
commit: b325c057322ce14b5c561d8ac49508adab7649e5 (patch)
tree: 61afc1dbb9a328634e30c70be0261320a0fdc36d /llvm/test/Transforms/LoopVectorize
parent: b6a0faaa0c793aede7911be241b1895a9ebea41c (diff)
download: bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.tar.gz
bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.zip
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
new file mode 100644
index 00000000000..38d838576bd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
@@ -0,0 +1,40 @@
+; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1-m.main-none-eabi"
+
+; CHECK-LABEL: test
+; CHECK: llvm.masked.store.v4i32.p0v4i32
+define void @test(i32* nocapture %A, i32 %n) #0 {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
+  %0 = load i32, i32* %arrayidx, align 4
+  %.off = add i32 %0, 9
+  %1 = icmp ult i32 %.off, 19
+  br i1 %1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+}
+
+attributes #0 = { "target-features"="+mve" }
author	David Green <david.green@arm.com>	2019-09-15 14:14:47 +0000
committer	David Green <david.green@arm.com>	2019-09-15 14:14:47 +0000
commit	b325c057322ce14b5c561d8ac49508adab7649e5 (patch)
tree	61afc1dbb9a328634e30c70be0261320a0fdc36d /llvm/test/Transforms/LoopVectorize
parent	b6a0faaa0c793aede7911be241b1895a9ebea41c (diff)
download	bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.tar.gz bcm5719-llvm-b325c057322ce14b5c561d8ac49508adab7649e5.zip