diff options
author | Silviu Baranga <silviu.baranga@arm.com> | 2015-08-19 14:11:27 +0000 |
---|---|---|
committer | Silviu Baranga <silviu.baranga@arm.com> | 2015-08-19 14:11:27 +0000 |
commit | ad1b19fcb718b1010aaab492e3cab80ab7a71406 (patch) | |
tree | f466116dfcd807a04d103d33fdc70e70afc1819e /llvm/test/CodeGen/ARM | |
parent | 746da5fe2a407254753965473728ee574d1b906c (diff) | |
download | bcm5719-llvm-ad1b19fcb718b1010aaab492e3cab80ab7a71406.tar.gz bcm5719-llvm-ad1b19fcb718b1010aaab492e3cab80ab7a71406.zip |
[ARM] Add instruction selection patterns for vmin/vmax
Summary:
The mid-end was generating vector smin/smax/umin/umax nodes, but
we were using vbsl to generatate the code. This adds the vmin/vmax
patterns and a test to check that we are now generating vmin/vmax
instructions.
Reviewers: rengolin, jmolloy
Subscribers: aemerson, rengolin, llvm-commits
Differential Revision: http://reviews.llvm.org/D12105
llvm-svn: 245439
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r-- | llvm/test/CodeGen/ARM/minmax.ll | 193 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/vselect_imax.ll | 15 |
2 files changed, 200 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/ARM/minmax.ll b/llvm/test/CodeGen/ARM/minmax.ll new file mode 100644 index 00000000000..78e8922fba0 --- /dev/null +++ b/llvm/test/CodeGen/ARM/minmax.ll @@ -0,0 +1,193 @@ +; RUN: llc < %s -mtriple=armv8-linux-gnu -mattr=+neon | FileCheck %s + +; CHECK-LABEL: t1 +; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp sgt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t2 +; CHECK: vmin.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp slt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t3 +; CHECK: vmax.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp ugt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t4 +; CHECK: vmin.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <4 x i32> @t4(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp ult <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t5 +; CHECK: vmax.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <2 x i32> @t5(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp sgt <2 x i32> %a, %b + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t6 +; CHECK: vmin.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp slt <2 x i32> %a, %b + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t7 +; CHECK: vmax.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <2 x i32> @t7(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp ugt <2 x i32> %a, %b + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t8 +; CHECK: vmin.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <2 x i32> @t8(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp ult <2 x i32> %a, %b + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t9 +; CHECK: vmax.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <8 x i16> @t9(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp sgt <8 x i16> %a, %b + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t10 +; CHECK: vmin.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <8 x i16> @t10(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp slt <8 x i16> %a, %b + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t11 +; CHECK: vmax.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <8 x i16> @t11(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp ugt <8 x i16> %a, %b + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t12 +; CHECK: vmin.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <8 x i16> @t12(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp ult <8 x i16> %a, %b + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t13 +; CHECK: vmax.s16 +define <4 x i16> @t13(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp sgt <4 x i16> %a, %b + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t14 +; CHECK: vmin.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <4 x i16> @t14(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp slt <4 x i16> %a, %b + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t15 +; CHECK: vmax.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <4 x i16> @t15(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp ugt <4 x i16> %a, %b + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t16 +; CHECK: vmin.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <4 x i16> @t16(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp ult <4 x i16> %a, %b + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t17 +; CHECK: vmax.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <16 x i8> @t17(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp sgt <16 x i8> %a, %b + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t18 +; CHECK: vmin.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <16 x i8> @t18(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp slt <16 x i8> %a, %b + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t19 +; CHECK: vmax.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <16 x i8> @t19(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp ugt <16 x i8> %a, %b + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t20 +; CHECK: vmin.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} +define <16 x i8> @t20(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp ult <16 x i8> %a, %b + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t21 +; CHECK: vmax.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <8 x i8> @t21(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp sgt <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} + +; CHECK-LABEL: t22 +; CHECK: vmin.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <8 x i8> @t22(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp slt <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} + +; CHECK-LABEL: t23 +; CHECK: vmax.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <8 x i8> @t23(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp ugt <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} + +; CHECK-LABEL: t24 +; CHECK: vmin.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +define <8 x i8> @t24(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp ult <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll index 0eb051036d9..3f52ac2db87 100644 --- a/llvm/test/CodeGen/ARM/vselect_imax.ll +++ b/llvm/test/CodeGen/ARM/vselect_imax.ll @@ -3,8 +3,7 @@ ; Make sure that ARM backend with NEON handles vselect. define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { -; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]] -; CHECK: vbsl [[QR]], [[Q1]], [[Q2]] +; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} %cmpres = icmp sgt <4 x i32> %a, %b %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b store <4 x i32> %maxres, <4 x i32>* %m @@ -21,8 +20,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, %v0 = load %T0_10, %T0_10* %loadaddr %v1 = load %T0_10, %T0_10* %loadaddr2 %c = icmp slt %T0_10 %v0, %v1 -; CHECK: vbsl -; CHECK: vbsl +; CHECK: vmin.s16 +; CHECK: vmin.s16 ; COST: func_blend10 ; COST: cost of 40 {{.*}} select %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 @@ -37,8 +36,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, %v0 = load %T0_14, %T0_14* %loadaddr %v1 = load %T0_14, %T0_14* %loadaddr2 %c = icmp slt %T0_14 %v0, %v1 -; CHECK: vbsl -; CHECK: vbsl +; CHECK: vmin.s32 +; CHECK: vmin.s32 ; COST: func_blend14 ; COST: cost of 41 {{.*}} select %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 @@ -50,8 +49,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, ; CHECK-LABEL: func_blend15: define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, %T1_15* %blend, %T0_15* %storeaddr) { -; CHECK: vbsl -; CHECK: vbsl +; CHECK: vmin.s32 +; CHECK: vmin.s32 %v0 = load %T0_15, %T0_15* %loadaddr %v1 = load %T0_15, %T0_15* %loadaddr2 %c = icmp slt %T0_15 %v0, %v1 |