summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2017-05-18 11:10:56 +0000
committerIgor Breger <igor.breger@intel.com>2017-05-18 11:10:56 +0000
commit842b5b36ba18f8db4b0fe7be56d5251208f85fe3 (patch)
treee6994902006cc43134622dec88a9069abae531b3
parent8d81c2216454b54c1c7d7c0c2a2e074a4978be44 (diff)
downloadbcm5719-llvm-842b5b36ba18f8db4b0fe7be56d5251208f85fe3.tar.gz
bcm5719-llvm-842b5b36ba18f8db4b0fe7be56d5251208f85fe3.zip
[GlobalISel][X86] G_ADD/G_SUB vector legalizer/selector support.
Summary: G_ADD/G_SUB vector legalizer/selector support. Reviewers: zvi, guyblank Reviewed By: guyblank Subscribers: rovka, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D33232 llvm-svn: 303345
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.cpp19
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/add-vec.ll111
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir119
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir157
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir139
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir119
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir120
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir120
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir55
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir54
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-add-v128.mir195
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-add-v256.mir185
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-add-v512.mir130
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-sub-v128.mir195
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-sub-v256.mir185
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/select-sub-v512.mir130
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/sub-vec.ll111
17 files changed, 2143 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 8ce240714f1..da724f5d898 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -184,6 +184,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
return;
const LLT s64 = LLT::scalar(64);
+ const LLT v16s8 = LLT::vector(16, 8);
const LLT v8s16 = LLT::vector(8, 16);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
@@ -193,7 +194,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
setAction({BinOp, Ty}, Legal);
for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v4s32})
+ for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
setAction({G_MUL, v8s16}, Legal);
@@ -212,8 +213,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX2() {
if (!Subtarget.hasAVX2())
return;
+ const LLT v32s8 = LLT::vector(32, 8);
const LLT v16s16 = LLT::vector(16, 16);
const LLT v8s32 = LLT::vector(8, 32);
+ const LLT v4s64 = LLT::vector(4, 64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+ setAction({BinOp, Ty}, Legal);
for (auto Ty : {v16s16, v8s32})
setAction({G_MUL, Ty}, Legal);
@@ -224,6 +231,11 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
return;
const LLT v16s32 = LLT::vector(16, 32);
+ const LLT v8s64 = LLT::vector(8, 64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v16s32, v8s64})
+ setAction({BinOp, Ty}, Legal);
setAction({G_MUL, v16s32}, Legal);
@@ -261,8 +273,13 @@ void X86LegalizerInfo::setLegalizerInfoAVX512BW() {
if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
return;
+ const LLT v64s8 = LLT::vector(64, 8);
const LLT v32s16 = LLT::vector(32, 16);
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v64s8, v32s16})
+ setAction({BinOp, Ty}, Legal);
+
setAction({G_MUL, v32s16}, Legal);
/************ VLX *******************/
diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-vec.ll b/llvm/test/CodeGen/X86/GlobalISel/add-vec.ll
new file mode 100644
index 00000000000..e9b4466943d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/add-vec.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX
+
+define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+; SKX-LABEL: test_add_v16i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = add <16 x i8> %arg1, %arg2
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+; SKX-LABEL: test_add_v8i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = add <8 x i16> %arg1, %arg2
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+; SKX-LABEL: test_add_v4i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = add <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+; SKX-LABEL: test_add_v2i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = add <2 x i64> %arg1, %arg2
+ ret <2 x i64> %ret
+}
+
+define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+; SKX-LABEL: test_add_v32i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = add <32 x i8> %arg1, %arg2
+ ret <32 x i8> %ret
+}
+
+define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+; SKX-LABEL: test_add_v16i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = add <16 x i16> %arg1, %arg2
+ ret <16 x i16> %ret
+}
+
+define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+; SKX-LABEL: test_add_v8i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = add <8 x i32> %arg1, %arg2
+ ret <8 x i32> %ret
+}
+
+define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+; SKX-LABEL: test_add_v4i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = add <4 x i64> %arg1, %arg2
+ ret <4 x i64> %ret
+}
+
+define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) {
+; SKX-LABEL: test_add_v64i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = add <64 x i8> %arg1, %arg2
+ ret <64 x i8> %ret
+}
+
+define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) {
+; SKX-LABEL: test_add_v32i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = add <32 x i16> %arg1, %arg2
+ ret <32 x i16> %ret
+}
+
+define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) {
+; SKX-LABEL: test_add_v16i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = add <16 x i32> %arg1, %arg2
+ ret <16 x i32> %ret
+}
+
+define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) {
+; SKX-LABEL: test_add_v8i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = add <8 x i64> %arg1, %arg2
+ ret <8 x i64> %ret
+}
+
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir
new file mode 100644
index 00000000000..feba33ac91b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+
+--- |
+ define void @test_add_v16i8() {
+ %ret = add <16 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_add_v8i16() {
+ %ret = add <8 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_add_v4i32() {
+ %ret = add <4 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_add_v2i64() {
+ %ret = add <2 x i64> undef, undef
+ ret void
+ }
+...
+---
+name: test_add_v16i8
+# ALL-LABEL: name: test_add_v16i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<16 x s8>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<16 x s8>) = IMPLICIT_DEF
+ %1(<16 x s8>) = IMPLICIT_DEF
+ %2(<16 x s8>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v8i16
+# ALL-LABEL: name: test_add_v8i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<8 x s16>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<8 x s16>) = IMPLICIT_DEF
+ %1(<8 x s16>) = IMPLICIT_DEF
+ %2(<8 x s16>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v4i32
+# ALL-LABEL: name: test_add_v4i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<4 x s32>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = IMPLICIT_DEF
+ %1(<4 x s32>) = IMPLICIT_DEF
+ %2(<4 x s32>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v2i64
+# ALL-LABEL: name: test_add_v2i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<2 x s64>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<2 x s64>) = IMPLICIT_DEF
+ %1(<2 x s64>) = IMPLICIT_DEF
+ %2(<2 x s64>) = G_ADD %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir
new file mode 100644
index 00000000000..f7dc8031b4f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir
@@ -0,0 +1,157 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+--- |
+ define void @test_add_v32i8() {
+ %ret = add <32 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_add_v16i16() {
+ %ret = add <16 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_add_v8i32() {
+ %ret = add <8 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_add_v4i64() {
+ %ret = add <4 x i64> undef, undef
+ ret void
+ }
+
+...
+---
+name: test_add_v32i8
+# ALL-LABEL: name: test_add_v32i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX1: %0(<32 x s8>) = IMPLICIT_DEF
+# AVX1-NEXT: %1(<32 x s8>) = IMPLICIT_DEF
+# AVX1-NEXT: %3(<16 x s8>), %4(<16 x s8>) = G_UNMERGE_VALUES %0(<32 x s8>)
+# AVX1-NEXT: %5(<16 x s8>), %6(<16 x s8>) = G_UNMERGE_VALUES %1(<32 x s8>)
+# AVX1-NEXT: %7(<16 x s8>) = G_ADD %3, %5
+# AVX1-NEXT: %8(<16 x s8>) = G_ADD %4, %6
+# AVX1-NEXT: %2(<32 x s8>) = G_MERGE_VALUES %7(<16 x s8>), %8(<16 x s8>)
+# AVX1-NEXT: RET 0
+#
+# AVX2: %0(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<32 x s8>) = G_ADD %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<32 x s8>) = IMPLICIT_DEF
+ %1(<32 x s8>) = IMPLICIT_DEF
+ %2(<32 x s8>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v16i16
+# ALL-LABEL: name: test_add_v16i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX1: %0(<16 x s16>) = IMPLICIT_DEF
+# AVX1-NEXT: %1(<16 x s16>) = IMPLICIT_DEF
+# AVX1-NEXT: %3(<8 x s16>), %4(<8 x s16>) = G_UNMERGE_VALUES %0(<16 x s16>)
+# AVX1-NEXT: %5(<8 x s16>), %6(<8 x s16>) = G_UNMERGE_VALUES %1(<16 x s16>)
+# AVX1-NEXT: %7(<8 x s16>) = G_ADD %3, %5
+# AVX1-NEXT: %8(<8 x s16>) = G_ADD %4, %6
+# AVX1-NEXT: %2(<16 x s16>) = G_MERGE_VALUES %7(<8 x s16>), %8(<8 x s16>)
+# AVX1-NEXT: RET 0
+#
+# AVX2: %0(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<16 x s16>) = G_ADD %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<16 x s16>) = IMPLICIT_DEF
+ %1(<16 x s16>) = IMPLICIT_DEF
+ %2(<16 x s16>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v8i32
+# ALL-LABEL: name: test_add_v8i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX1: %0(<8 x s32>) = IMPLICIT_DEF
+# AVX1-NEXT: %1(<8 x s32>) = IMPLICIT_DEF
+# AVX1-NEXT: %3(<4 x s32>), %4(<4 x s32>) = G_UNMERGE_VALUES %0(<8 x s32>)
+# AVX1-NEXT: %5(<4 x s32>), %6(<4 x s32>) = G_UNMERGE_VALUES %1(<8 x s32>)
+# AVX1-NEXT: %7(<4 x s32>) = G_ADD %3, %5
+# AVX1-NEXT: %8(<4 x s32>) = G_ADD %4, %6
+# AVX1-NEXT: %2(<8 x s32>) = G_MERGE_VALUES %7(<4 x s32>), %8(<4 x s32>)
+# AVX1-NEXT: RET 0
+#
+# AVX2: %0(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<8 x s32>) = G_ADD %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<8 x s32>) = IMPLICIT_DEF
+ %1(<8 x s32>) = IMPLICIT_DEF
+ %2(<8 x s32>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v4i64
+# ALL-LABEL: name: test_add_v4i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX1: %0(<4 x s64>) = IMPLICIT_DEF
+# AVX1-NEXT: %1(<4 x s64>) = IMPLICIT_DEF
+# AVX1-NEXT: %3(<2 x s64>), %4(<2 x s64>) = G_UNMERGE_VALUES %0(<4 x s64>)
+# AVX1-NEXT: %5(<2 x s64>), %6(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
+# AVX1-NEXT: %7(<2 x s64>) = G_ADD %3, %5
+# AVX1-NEXT: %8(<2 x s64>) = G_ADD %4, %6
+# AVX1-NEXT: %2(<4 x s64>) = G_MERGE_VALUES %7(<2 x s64>), %8(<2 x s64>)
+# AVX1-NEXT: RET 0
+#
+# AVX2: %0(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<4 x s64>) = G_ADD %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<4 x s64>) = IMPLICIT_DEF
+ %1(<4 x s64>) = IMPLICIT_DEF
+ %2(<4 x s64>) = G_ADD %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir
new file mode 100644
index 00000000000..2b8b51acaa5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir
@@ -0,0 +1,139 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+
+--- |
+ define void @test_add_v64i8() {
+ %ret = add <64 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_add_v32i16() {
+ %ret = add <32 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_add_v16i32() {
+ %ret = add <16 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_add_v8i64() {
+ %ret = add <8 x i64> undef, undef
+ ret void
+ }
+
+...
+---
+name: test_add_v64i8
+# ALL-LABEL: name: test_add_v64i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX512F: %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512F-NEXT: %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512F-NEXT: %3(<32 x s8>), %4(<32 x s8>) = G_UNMERGE_VALUES %0(<64 x s8>)
+# AVX512F-NEXT: %5(<32 x s8>), %6(<32 x s8>) = G_UNMERGE_VALUES %1(<64 x s8>)
+# AVX512F-NEXT: %7(<32 x s8>) = G_ADD %3, %5
+# AVX512F-NEXT: %8(<32 x s8>) = G_ADD %4, %6
+# AVX512F-NEXT: %2(<64 x s8>) = G_MERGE_VALUES %7(<32 x s8>), %8(<32 x s8>)
+# AVX512F-NEXT: RET 0
+#
+# AVX512BW: %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %2(<64 x s8>) = G_ADD %0, %1
+# AVX512BW-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<64 x s8>) = IMPLICIT_DEF
+ %1(<64 x s8>) = IMPLICIT_DEF
+ %2(<64 x s8>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v32i16
+# ALL-LABEL: name: test_add_v32i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX512F: %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512F-NEXT: %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512F-NEXT: %3(<16 x s16>), %4(<16 x s16>) = G_UNMERGE_VALUES %0(<32 x s16>)
+# AVX512F-NEXT: %5(<16 x s16>), %6(<16 x s16>) = G_UNMERGE_VALUES %1(<32 x s16>)
+# AVX512F-NEXT: %7(<16 x s16>) = G_ADD %3, %5
+# AVX512F-NEXT: %8(<16 x s16>) = G_ADD %4, %6
+# AVX512F-NEXT: %2(<32 x s16>) = G_MERGE_VALUES %7(<16 x s16>), %8(<16 x s16>)
+# AVX512F-NEXT: RET 0
+#
+# AVX512BW: %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %2(<32 x s16>) = G_ADD %0, %1
+# AVX512BW-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<32 x s16>) = IMPLICIT_DEF
+ %1(<32 x s16>) = IMPLICIT_DEF
+ %2(<32 x s16>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v16i32
+# ALL-LABEL: name: test_add_v16i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<16 x s32>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<16 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = IMPLICIT_DEF
+ %2(<16 x s32>) = G_ADD %0, %1
+ RET 0
+
+...
+---
+name: test_add_v8i64
+# ALL-LABEL: name: test_add_v8i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<8 x s64>) = G_ADD %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<8 x s64>) = IMPLICIT_DEF
+ %1(<8 x s64>) = IMPLICIT_DEF
+ %2(<8 x s64>) = G_ADD %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir
new file mode 100644
index 00000000000..2f90fc9a3c9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+
+--- |
+ define void @test_sub_v16i8() {
+ %ret = sub <16 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v8i16() {
+ %ret = sub <8 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v4i32() {
+ %ret = sub <4 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v2i64() {
+ %ret = sub <2 x i64> undef, undef
+ ret void
+ }
+...
+---
+name: test_sub_v16i8
+# ALL-LABEL: name: test_sub_v16i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<16 x s8>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<16 x s8>) = IMPLICIT_DEF
+ %1(<16 x s8>) = IMPLICIT_DEF
+ %2(<16 x s8>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v8i16
+# ALL-LABEL: name: test_sub_v8i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<8 x s16>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<8 x s16>) = IMPLICIT_DEF
+ %1(<8 x s16>) = IMPLICIT_DEF
+ %2(<8 x s16>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v4i32
+# ALL-LABEL: name: test_sub_v4i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<4 x s32>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = IMPLICIT_DEF
+ %1(<4 x s32>) = IMPLICIT_DEF
+ %2(<4 x s32>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v2i64
+# ALL-LABEL: name: test_sub_v2i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<2 x s64>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<2 x s64>) = IMPLICIT_DEF
+ %1(<2 x s64>) = IMPLICIT_DEF
+ %2(<2 x s64>) = G_SUB %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir
new file mode 100644
index 00000000000..9d07787b8ec
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir
@@ -0,0 +1,120 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# TODO: add tests for additional configuration after the legalization supported
+--- |
+ define void @test_sub_v32i8() {
+ %ret = sub <32 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v16i16() {
+ %ret = sub <16 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v8i32() {
+ %ret = sub <8 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v4i64() {
+ %ret = sub <4 x i64> undef, undef
+ ret void
+ }
+
+...
+---
+name: test_sub_v32i8
+# ALL-LABEL: name: test_sub_v32i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX2: %0(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<32 x s8>) = G_SUB %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<32 x s8>) = IMPLICIT_DEF
+ %1(<32 x s8>) = IMPLICIT_DEF
+ %2(<32 x s8>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v16i16
+# ALL-LABEL: name: test_sub_v16i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX2: %0(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<16 x s16>) = G_SUB %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<16 x s16>) = IMPLICIT_DEF
+ %1(<16 x s16>) = IMPLICIT_DEF
+ %2(<16 x s16>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v8i32
+# ALL-LABEL: name: test_sub_v8i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX2: %0(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<8 x s32>) = G_SUB %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<8 x s32>) = IMPLICIT_DEF
+ %1(<8 x s32>) = IMPLICIT_DEF
+ %2(<8 x s32>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v4i64
+# ALL-LABEL: name: test_sub_v4i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX2: %0(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT: %1(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT: %2(<4 x s64>) = G_SUB %0, %1
+# AVX2-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<4 x s64>) = IMPLICIT_DEF
+ %1(<4 x s64>) = IMPLICIT_DEF
+ %2(<4 x s64>) = G_SUB %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir
new file mode 100644
index 00000000000..c88e074ca41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir
@@ -0,0 +1,120 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+# TODO: add tests for additional configuration after the legalization supported
+--- |
+ define void @test_sub_v64i8() {
+ %ret = sub <64 x i8> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v32i16() {
+ %ret = sub <32 x i16> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v16i32() {
+ %ret = sub <16 x i32> undef, undef
+ ret void
+ }
+
+ define void @test_sub_v8i64() {
+ %ret = sub <8 x i64> undef, undef
+ ret void
+ }
+
+...
+---
+name: test_sub_v64i8
+# ALL-LABEL: name: test_sub_v64i8
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX512BW: %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %2(<64 x s8>) = G_SUB %0, %1
+# AVX512BW-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<64 x s8>) = IMPLICIT_DEF
+ %1(<64 x s8>) = IMPLICIT_DEF
+ %2(<64 x s8>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v32i16
+# ALL-LABEL: name: test_sub_v32i16
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# AVX512BW: %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT: %2(<32 x s16>) = G_SUB %0, %1
+# AVX512BW-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<32 x s16>) = IMPLICIT_DEF
+ %1(<32 x s16>) = IMPLICIT_DEF
+ %2(<32 x s16>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v16i32
+# ALL-LABEL: name: test_sub_v16i32
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<16 x s32>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<16 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = IMPLICIT_DEF
+ %2(<16 x s32>) = G_SUB %0, %1
+ RET 0
+
+...
+---
+name: test_sub_v8i64
+# ALL-LABEL: name: test_sub_v8i64
+alignment: 4
+legalized: false
+regBankSelected: false
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+# ALL: %0(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %1(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT: %2(<8 x s64>) = G_SUB %0, %1
+# ALL-NEXT: RET 0
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<8 x s64>) = IMPLICIT_DEF
+ %1(<8 x s64>) = IMPLICIT_DEF
+ %2(<8 x s64>) = G_SUB %0, %1
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
index 446db56b992..f925c836f3d 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
@@ -5,6 +5,15 @@
define void @test_mul_vec256() {
ret void
}
+
+ define void @test_add_vec256() {
+ ret void
+ }
+
+ define void @test_sub_vec256() {
+ ret void
+ }
+
...
---
name: test_mul_vec256
@@ -29,3 +38,49 @@ body: |
RET 0
...
+---
+name: test_add_vec256
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+# CHECK-LABEL: name: test_add_vec256
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: vecr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<8 x s32>) = IMPLICIT_DEF
+ %1(<8 x s32>) = G_ADD %0, %0
+ RET 0
+
+...
+---
+name: test_sub_vec256
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+# CHECK-LABEL: name: test_sub_vec256
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: vecr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<8 x s32>) = IMPLICIT_DEF
+ %1(<8 x s32>) = G_SUB %0, %0
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
index f824ee12dcf..e0c12ff44a2 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
@@ -7,6 +7,14 @@
ret void
}
+ define void @test_add_vec512() {
+ ret void
+ }
+
+ define void @test_sub_vec512() {
+ ret void
+ }
+
...
---
name: test_mul_vec512
@@ -31,3 +39,49 @@ body: |
RET 0
...
+---
+name: test_add_vec512
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+# CHECK-LABEL: name: test_add_vec512
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: vecr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<16 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = G_ADD %0, %0
+ RET 0
+
+...
+---
+name: test_sub_vec512
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+# CHECK-LABEL: name: test_sub_vec512
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: vecr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+
+ %0(<16 x s32>) = IMPLICIT_DEF
+ %1(<16 x s32>) = G_SUB %0, %0
+ RET 0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-v128.mir
new file mode 100644
index 00000000000..a39702340bc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-v128.mir
@@ -0,0 +1,195 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+ define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %ret = add <16 x i8> %arg1, %arg2
+ ret <16 x i8> %ret
+ }
+
+ define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %ret = add <8 x i16> %arg1, %arg2
+ ret <8 x i16> %ret
+ }
+
+ define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %ret = add <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+ }
+
+ define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+ %ret = add <2 x i64> %arg1, %arg2
+ ret <2 x i64> %ret
+ }
+
+...
+---
+name: test_add_v16i8
+# ALL-LABEL: name: test_add_v16i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128 }
+# AVX512VL-NEXT: - { id: 1, class: vr128 }
+# AVX512VL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PADDBrr %0, %1
+#
+# AVX1: %2 = VPADDBrr %0, %1
+#
+# AVX512VL: %2 = VPADDBrr %0, %1
+#
+# AVX512BWVL: %2 = VPADDBZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<16 x s8>) = COPY %xmm0
+ %1(<16 x s8>) = COPY %xmm1
+ %2(<16 x s8>) = G_ADD %0, %1
+ %xmm0 = COPY %2(<16 x s8>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_v8i16
+# ALL-LABEL: name: test_add_v8i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128 }
+# AVX512VL-NEXT: - { id: 1, class: vr128 }
+# AVX512VL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PADDWrr %0, %1
+#
+# AVX1: %2 = VPADDWrr %0, %1
+#
+# AVX512VL: %2 = VPADDWrr %0, %1
+#
+# AVX512BWVL: %2 = VPADDWZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<8 x s16>) = COPY %xmm0
+ %1(<8 x s16>) = COPY %xmm1
+ %2(<8 x s16>) = G_ADD %0, %1
+ %xmm0 = COPY %2(<8 x s16>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_v4i32
+# ALL-LABEL: name: test_add_v4i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PADDDrr %0, %1
+#
+# AVX1: %2 = VPADDDrr %0, %1
+#
+# AVX512VL: %2 = VPADDDZ128rr %0, %1
+#
+# AVX512BWVL: %2 = VPADDDZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_ADD %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_add_v2i64
+# ALL-LABEL: name: test_add_v2i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PADDQrr %0, %1
+#
+# AVX1: %2 = VPADDQrr %0, %1
+#
+# AVX512VL: %2 = VPADDQZ128rr %0, %1
+#
+# AVX512BWVL: %2 = VPADDQZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<2 x s64>) = COPY %xmm0
+ %1(<2 x s64>) = COPY %xmm1
+ %2(<2 x s64>) = G_ADD %0, %1
+ %xmm0 = COPY %2(<2 x s64>)
+ RET 0, implicit %xmm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-v256.mir
new file mode 100644
index 00000000000..7556c210412
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-v256.mir
@@ -0,0 +1,185 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+ define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+ %ret = add <32 x i8> %arg1, %arg2
+ ret <32 x i8> %ret
+ }
+
+ define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+ %ret = add <16 x i16> %arg1, %arg2
+ ret <16 x i16> %ret
+ }
+
+ define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+ %ret = add <8 x i32> %arg1, %arg2
+ ret <8 x i32> %ret
+ }
+
+ define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+ %ret = add <4 x i64> %arg1, %arg2
+ ret <4 x i64> %ret
+ }
+...
+---
+name: test_add_v32i8
+# ALL-LABEL: name: test_add_v32i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256 }
+# AVX512VL-NEXT: - { id: 1, class: vr256 }
+# AVX512VL-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPADDBYrr %0, %1
+#
+# AVX512VL: %2 = VPADDBYrr %0, %1
+#
+# AVX512BWVL: %2 = VPADDBZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<32 x s8>) = COPY %ymm0
+ %1(<32 x s8>) = COPY %ymm1
+ %2(<32 x s8>) = G_ADD %0, %1
+ %ymm0 = COPY %2(<32 x s8>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_add_v16i16
+# ALL-LABEL: name: test_add_v16i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256 }
+# AVX512VL-NEXT: - { id: 1, class: vr256 }
+# AVX512VL-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPADDWYrr %0, %1
+#
+# AVX512VL: %2 = VPADDWYrr %0, %1
+#
+# AVX512BWVL: %2 = VPADDWZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<16 x s16>) = COPY %ymm0
+ %1(<16 x s16>) = COPY %ymm1
+ %2(<16 x s16>) = G_ADD %0, %1
+ %ymm0 = COPY %2(<16 x s16>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_add_v8i32
+# ALL-LABEL: name: test_add_v8i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256x }
+# AVX512VL-NEXT: - { id: 1, class: vr256x }
+# AVX512VL-NEXT: - { id: 2, class: vr256x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPADDDYrr %0, %1
+#
+# AVX512VL: %2 = VPADDDZ256rr %0, %1
+#
+# AVX512BWVL: %2 = VPADDDZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<8 x s32>) = COPY %ymm0
+ %1(<8 x s32>) = COPY %ymm1
+ %2(<8 x s32>) = G_ADD %0, %1
+ %ymm0 = COPY %2(<8 x s32>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_add_v4i64
+# ALL-LABEL: name: test_add_v4i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256x }
+# AVX512VL-NEXT: - { id: 1, class: vr256x }
+# AVX512VL-NEXT: - { id: 2, class: vr256x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPADDQYrr %0, %1
+#
+# AVX512VL: %2 = VPADDQZ256rr %0, %1
+#
+# AVX512BWVL: %2 = VPADDQZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<4 x s64>) = COPY %ymm0
+ %1(<4 x s64>) = COPY %ymm1
+ %2(<4 x s64>) = G_ADD %0, %1
+ %ymm0 = COPY %2(<4 x s64>)
+ RET 0, implicit %ymm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-v512.mir
new file mode 100644
index 00000000000..e90be4e996f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-v512.mir
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+ define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 {
+ %ret = add <64 x i8> %arg1, %arg2
+ ret <64 x i8> %ret
+ }
+
+ define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 {
+ %ret = add <32 x i16> %arg1, %arg2
+ ret <32 x i16> %ret
+ }
+
+ define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 {
+ %ret = add <16 x i32> %arg1, %arg2
+ ret <16 x i32> %ret
+ }
+
+ define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 {
+ %ret = add <8 x i64> %arg1, %arg2
+ ret <8 x i64> %ret
+ }
+
+ attributes #0 = { "target-features"="+avx512f,+avx512bw" }
+ attributes #1 = { "target-features"="+avx512f" }
+...
+---
+name: test_add_v64i8
+# ALL-LABEL: name: test_add_v64i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPADDBZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<64 x s8>) = COPY %zmm0
+ %1(<64 x s8>) = COPY %zmm1
+ %2(<64 x s8>) = G_ADD %0, %1
+ %zmm0 = COPY %2(<64 x s8>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_add_v32i16
+# ALL-LABEL: name: test_add_v32i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPADDWZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<32 x s16>) = COPY %zmm0
+ %1(<32 x s16>) = COPY %zmm1
+ %2(<32 x s16>) = G_ADD %0, %1
+ %zmm0 = COPY %2(<32 x s16>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_add_v16i32
+# ALL-LABEL: name: test_add_v16i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPADDDZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<16 x s32>) = COPY %zmm0
+ %1(<16 x s32>) = COPY %zmm1
+ %2(<16 x s32>) = G_ADD %0, %1
+ %zmm0 = COPY %2(<16 x s32>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_add_v8i64
+# ALL-LABEL: name: test_add_v8i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPADDQZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<8 x s64>) = COPY %zmm0
+ %1(<8 x s64>) = COPY %zmm1
+ %2(<8 x s64>) = G_ADD %0, %1
+ %zmm0 = COPY %2(<8 x s64>)
+ RET 0, implicit %zmm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
new file mode 100644
index 00000000000..d60d4155e29
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
@@ -0,0 +1,195 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+ define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+ %ret = sub <16 x i8> %arg1, %arg2
+ ret <16 x i8> %ret
+ }
+
+ define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+ %ret = sub <8 x i16> %arg1, %arg2
+ ret <8 x i16> %ret
+ }
+
+ define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+ %ret = sub <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+ }
+
+ define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+ %ret = sub <2 x i64> %arg1, %arg2
+ ret <2 x i64> %ret
+ }
+
+...
+---
+name: test_sub_v16i8
+# ALL-LABEL: name: test_sub_v16i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128 }
+# AVX512VL-NEXT: - { id: 1, class: vr128 }
+# AVX512VL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PSUBBrr %0, %1
+#
+# AVX1: %2 = VPSUBBrr %0, %1
+#
+# AVX512VL: %2 = VPSUBBrr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBBZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<16 x s8>) = COPY %xmm0
+ %1(<16 x s8>) = COPY %xmm1
+ %2(<16 x s8>) = G_SUB %0, %1
+ %xmm0 = COPY %2(<16 x s8>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_sub_v8i16
+# ALL-LABEL: name: test_sub_v8i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128 }
+# AVX512VL-NEXT: - { id: 1, class: vr128 }
+# AVX512VL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PSUBWrr %0, %1
+#
+# AVX1: %2 = VPSUBWrr %0, %1
+#
+# AVX512VL: %2 = VPSUBWrr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBWZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<8 x s16>) = COPY %xmm0
+ %1(<8 x s16>) = COPY %xmm1
+ %2(<8 x s16>) = G_SUB %0, %1
+ %xmm0 = COPY %2(<8 x s16>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_sub_v4i32
+# ALL-LABEL: name: test_sub_v4i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PSUBDrr %0, %1
+#
+# AVX1: %2 = VPSUBDrr %0, %1
+#
+# AVX512VL: %2 = VPSUBDZ128rr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBDZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(<4 x s32>) = COPY %xmm1
+ %2(<4 x s32>) = G_SUB %0, %1
+ %xmm0 = COPY %2(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_sub_v2i64
+# ALL-LABEL: name: test_sub_v2i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# NOVL: registers:
+# NOVL-NEXT: - { id: 0, class: vr128 }
+# NOVL-NEXT: - { id: 1, class: vr128 }
+# NOVL-NEXT: - { id: 2, class: vr128 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr128x }
+# AVX512VL-NEXT: - { id: 1, class: vr128x }
+# AVX512VL-NEXT: - { id: 2, class: vr128x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# SSE2: %2 = PSUBQrr %0, %1
+#
+# AVX1: %2 = VPSUBQrr %0, %1
+#
+# AVX512VL: %2 = VPSUBQZ128rr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBQZ128rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %xmm0, %xmm1
+
+ %0(<2 x s64>) = COPY %xmm0
+ %1(<2 x s64>) = COPY %xmm1
+ %2(<2 x s64>) = G_SUB %0, %1
+ %xmm0 = COPY %2(<2 x s64>)
+ RET 0, implicit %xmm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
new file mode 100644
index 00000000000..fbc44997b4a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
@@ -0,0 +1,185 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+ define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+ %ret = sub <32 x i8> %arg1, %arg2
+ ret <32 x i8> %ret
+ }
+
+ define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+ %ret = sub <16 x i16> %arg1, %arg2
+ ret <16 x i16> %ret
+ }
+
+ define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+ %ret = sub <8 x i32> %arg1, %arg2
+ ret <8 x i32> %ret
+ }
+
+ define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+ %ret = sub <4 x i64> %arg1, %arg2
+ ret <4 x i64> %ret
+ }
+...
+---
+name: test_sub_v32i8
+# ALL-LABEL: name: test_sub_v32i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256 }
+# AVX512VL-NEXT: - { id: 1, class: vr256 }
+# AVX512VL-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPSUBBYrr %0, %1
+#
+# AVX512VL: %2 = VPSUBBYrr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBBZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<32 x s8>) = COPY %ymm0
+ %1(<32 x s8>) = COPY %ymm1
+ %2(<32 x s8>) = G_SUB %0, %1
+ %ymm0 = COPY %2(<32 x s8>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_sub_v16i16
+# ALL-LABEL: name: test_sub_v16i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256 }
+# AVX512VL-NEXT: - { id: 1, class: vr256 }
+# AVX512VL-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPSUBWYrr %0, %1
+#
+# AVX512VL: %2 = VPSUBWYrr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBWZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<16 x s16>) = COPY %ymm0
+ %1(<16 x s16>) = COPY %ymm1
+ %2(<16 x s16>) = G_SUB %0, %1
+ %ymm0 = COPY %2(<16 x s16>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_sub_v8i32
+# ALL-LABEL: name: test_sub_v8i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256x }
+# AVX512VL-NEXT: - { id: 1, class: vr256x }
+# AVX512VL-NEXT: - { id: 2, class: vr256x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPSUBDYrr %0, %1
+#
+# AVX512VL: %2 = VPSUBDZ256rr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBDZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<8 x s32>) = COPY %ymm0
+ %1(<8 x s32>) = COPY %ymm1
+ %2(<8 x s32>) = G_SUB %0, %1
+ %ymm0 = COPY %2(<8 x s32>)
+ RET 0, implicit %ymm0
+
+...
+---
+name: test_sub_v4i64
+# ALL-LABEL: name: test_sub_v4i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# AVX2: registers:
+# AVX2-NEXT: - { id: 0, class: vr256 }
+# AVX2-NEXT: - { id: 1, class: vr256 }
+# AVX2-NEXT: - { id: 2, class: vr256 }
+#
+# AVX512VL: registers:
+# AVX512VL-NEXT: - { id: 0, class: vr256x }
+# AVX512VL-NEXT: - { id: 1, class: vr256x }
+# AVX512VL-NEXT: - { id: 2, class: vr256x }
+#
+# AVX512BWVL: registers:
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# AVX2: %2 = VPSUBQYrr %0, %1
+#
+# AVX512VL: %2 = VPSUBQZ256rr %0, %1
+#
+# AVX512BWVL: %2 = VPSUBQZ256rr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %ymm0, %ymm1
+
+ %0(<4 x s64>) = COPY %ymm0
+ %1(<4 x s64>) = COPY %ymm1
+ %2(<4 x s64>) = G_SUB %0, %1
+ %ymm0 = COPY %2(<4 x s64>)
+ RET 0, implicit %ymm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
new file mode 100644
index 00000000000..dcd05f05694
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+ define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 {
+ %ret = sub <64 x i8> %arg1, %arg2
+ ret <64 x i8> %ret
+ }
+
+ define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 {
+ %ret = sub <32 x i16> %arg1, %arg2
+ ret <32 x i16> %ret
+ }
+
+ define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 {
+ %ret = sub <16 x i32> %arg1, %arg2
+ ret <16 x i32> %ret
+ }
+
+ define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 {
+ %ret = sub <8 x i64> %arg1, %arg2
+ ret <8 x i64> %ret
+ }
+
+ attributes #0 = { "target-features"="+avx512f,+avx512bw" }
+ attributes #1 = { "target-features"="+avx512f" }
+...
+---
+name: test_sub_v64i8
+# ALL-LABEL: name: test_sub_v64i8
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPSUBBZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<64 x s8>) = COPY %zmm0
+ %1(<64 x s8>) = COPY %zmm1
+ %2(<64 x s8>) = G_SUB %0, %1
+ %zmm0 = COPY %2(<64 x s8>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_sub_v32i16
+# ALL-LABEL: name: test_sub_v32i16
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPSUBWZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<32 x s16>) = COPY %zmm0
+ %1(<32 x s16>) = COPY %zmm1
+ %2(<32 x s16>) = G_SUB %0, %1
+ %zmm0 = COPY %2(<32 x s16>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_sub_v16i32
+# ALL-LABEL: name: test_sub_v16i32
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPSUBDZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<16 x s32>) = COPY %zmm0
+ %1(<16 x s32>) = COPY %zmm1
+ %2(<16 x s32>) = G_SUB %0, %1
+ %zmm0 = COPY %2(<16 x s32>)
+ RET 0, implicit %zmm0
+
+...
+---
+name: test_sub_v8i64
+# ALL-LABEL: name: test_sub_v8i64
+alignment: 4
+legalized: true
+regBankSelected: true
+# ALL: registers:
+# ALL-NEXT: - { id: 0, class: vr512 }
+# ALL-NEXT: - { id: 1, class: vr512 }
+# ALL-NEXT: - { id: 2, class: vr512 }
+registers:
+ - { id: 0, class: vecr }
+ - { id: 1, class: vecr }
+ - { id: 2, class: vecr }
+# ALL: %2 = VPSUBQZrr %0, %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %zmm0, %zmm1
+
+ %0(<8 x s64>) = COPY %zmm0
+ %1(<8 x s64>) = COPY %zmm1
+ %2(<8 x s64>) = G_SUB %0, %1
+ %zmm0 = COPY %2(<8 x s64>)
+ RET 0, implicit %zmm0
+
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-vec.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-vec.ll
new file mode 100644
index 00000000000..3b2957793b3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/sub-vec.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX
+
+define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+; SKX-LABEL: test_sub_v16i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = sub <16 x i8> %arg1, %arg2
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+; SKX-LABEL: test_sub_v8i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = sub <8 x i16> %arg1, %arg2
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+; SKX-LABEL: test_sub_v4i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = sub <4 x i32> %arg1, %arg2
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+; SKX-LABEL: test_sub_v2i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %ret = sub <2 x i64> %arg1, %arg2
+ ret <2 x i64> %ret
+}
+
+define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+; SKX-LABEL: test_sub_v32i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = sub <32 x i8> %arg1, %arg2
+ ret <32 x i8> %ret
+}
+
+define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+; SKX-LABEL: test_sub_v16i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = sub <16 x i16> %arg1, %arg2
+ ret <16 x i16> %ret
+}
+
+define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+; SKX-LABEL: test_sub_v8i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = sub <8 x i32> %arg1, %arg2
+ ret <8 x i32> %ret
+}
+
+define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+; SKX-LABEL: test_sub_v4i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %ret = sub <4 x i64> %arg1, %arg2
+ ret <4 x i64> %ret
+}
+
+define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) {
+; SKX-LABEL: test_sub_v64i8:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubb %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = sub <64 x i8> %arg1, %arg2
+ ret <64 x i8> %ret
+}
+
+define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) {
+; SKX-LABEL: test_sub_v32i16:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubw %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = sub <32 x i16> %arg1, %arg2
+ ret <32 x i16> %ret
+}
+
+define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) {
+; SKX-LABEL: test_sub_v16i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = sub <16 x i32> %arg1, %arg2
+ ret <16 x i32> %ret
+}
+
+define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) {
+; SKX-LABEL: test_sub_v8i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %ret = sub <8 x i64> %arg1, %arg2
+ ret <8 x i64> %ret
+}
+
OpenPOWER on IntegriCloud