1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
; CHECK: vmins_8xi8
define <8 x i8> @vmins_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
; CHECK: vmin.s8 d16, d16, d17 @ encoding: [0xb1,0x06,0x40,0xf2]
%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
; CHECK: vmins_4xi16
define <4 x i16> @vmins_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
; CHECK: vmin.s16 d16, d16, d17 @ encoding: [0xb1,0x06,0x50,0xf2]
%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
; CHECK: vmins_2xi32
define <2 x i32> @vmins_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
; CHECK: vmin.s32 d16, d16, d17 @ encoding: [0xb1,0x06,0x60,0xf2]
%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
; CHECK: vminu_8xi8
define <8 x i8> @vminu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
; CHECK: vmin.u8 d16, d16, d17 @ encoding: [0xb1,0x06,0x40,0xf3]
%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
; CHECK: vminu_4xi16
define <4 x i16> @vminu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
; CHECK: vmin.u16 d16, d16, d17 @ encoding: [0xb1,0x06,0x50,0xf3]
%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
; CHECK: vminu_2xi32
define <2 x i32> @vminu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
; CHECK: vmin.u32 d16, d16, d17 @ encoding: [0xb1,0x06,0x60,0xf3]
%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
; CHECK: vmin_2xfloat
define <2 x float> @vmin_2xfloat(<2 x float>* %A, <2 x float>* %B) nounwind {
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
; CHECK: vmin.f32 d16, d16, d17 @ encoding: [0xa1,0x0f,0x60,0xf2]
%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
; CHECK: vmins_16xi8
define <16 x i8> @vmins_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
; CHECK: vmin.s8 q8, q8, q9 @ encoding: [0xf2,0x06,0x40,0xf2]
%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
; CHECK: vmins_8xi16
define <8 x i16> @vmins_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
; CHECK: vmin.s16 q8, q8, q9 @ encoding: [0xf2,0x06,0x50,0xf2]
%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
; CHECK: vmins_4xi32
define <4 x i32> @vmins_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
; CHECK: vmin.s32 q8, q8, q9 @ encoding: [0xf2,0x06,0x60,0xf2]
%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
; CHECK: vminu_16xi8
define <16 x i8> @vminu_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
; CHECK: vmin.u8 q8, q8, q9 @ encoding: [0xf2,0x06,0x40,0xf3]
%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
; CHECK: vminu_8xi16
define <8 x i16> @vminu_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
; CHECK: vmin.u16 q8, q8, q9 @ encoding: [0xf2,0x06,0x50,0xf3]
%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
; CHECK: vminu_4xi32
define <4 x i32> @vminu_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
; CHECK: vmin.u32 q8, q8, q9 @ encoding: [0xf2,0x06,0x60,0xf3]
%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
; CHECK: vmin_4xfloat
define <4 x float> @vmin_4xfloat(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp1 = load <4 x float>* %A
%tmp2 = load <4 x float>* %B
; CHECK: vmin.f32 q8, q8, q9 @ encoding: [0xe2,0x0f,0x60,0xf2]
%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
|