diff options
| author | Coby Tayree <coby.tayree@intel.com> | 2017-11-21 09:30:33 +0000 |
|---|---|---|
| committer | Coby Tayree <coby.tayree@intel.com> | 2017-11-21 09:30:33 +0000 |
| commit | 7ca5e5873616c56f6ce473588a303af143b553f1 (patch) | |
| tree | 9758b0e16b6d532b4601a26a3d398d12d06246c3 /llvm/test/CodeGen | |
| parent | 2a1c02fcbc823438f16697f939005d26d6294de2 (diff) | |
| download | bcm5719-llvm-7ca5e5873616c56f6ce473588a303af143b553f1.tar.gz bcm5719-llvm-7ca5e5873616c56f6ce473588a303af143b553f1.zip | |
[x86][icelake]vpclmulqdq introduction
an icelake promotion of pclmulqdq
Differential Revision: https://reviews.llvm.org/D40101
llvm-svn: 318741
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vpclmulqdq.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-vpclmulqdq.ll | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-vpclmulqdq.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/commute-vpclmulqdq-avx.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/commute-vpclmulqdq-avx512.ll | 116 |
5 files changed, 204 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vpclmulqdq.ll b/llvm/test/CodeGen/X86/avx-vpclmulqdq.ll new file mode 100644 index 00000000000..abf18fe5b0a --- /dev/null +++ b/llvm/test/CodeGen/X86/avx-vpclmulqdq.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx,vpclmulqdq -show-mc-encoding | FileCheck %s --check-prefix=AVX_VPCLMULQDQ + +; Check for vpclmulqdq +define <4 x i64> @test_x86_pclmulqdq(<4 x i64> %a0, <4 x i64> %a1) { +; AVX_VPCLMULQDQ-LABEL: test_x86_pclmulqdq: +; AVX_VPCLMULQDQ: # BB#0: +; AVX_VPCLMULQDQ-NEXT: vpclmulqdq $17, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x44,0xc1,0x11] +; AVX_VPCLMULQDQ-NEXT: retl # encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 17) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8) nounwind readnone + diff --git a/llvm/test/CodeGen/X86/avx512-vpclmulqdq.ll b/llvm/test/CodeGen/X86/avx512-vpclmulqdq.ll new file mode 100644 index 00000000000..186cdab05af --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-vpclmulqdq.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+vpclmulqdq -show-mc-encoding | FileCheck %s --check-prefix=AVX512_VPCLMULQDQ + +define <8 x i64> @test_x86_pclmulqdq(<8 x i64> %a0, <8 x i64> %a1) { +; AVX512_VPCLMULQDQ-LABEL: test_x86_pclmulqdq: +; AVX512_VPCLMULQDQ: # BB#0: +; AVX512_VPCLMULQDQ-NEXT: vpclmulqdq $1, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x44,0xc1,0x01] +; AVX512_VPCLMULQDQ-NEXT: retq # encoding: [0xc3] + %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 1) + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx512vl-vpclmulqdq.ll b/llvm/test/CodeGen/X86/avx512vl-vpclmulqdq.ll new file mode 100644 index 00000000000..3db3b9ecff5 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512vl-vpclmulqdq.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+vpclmulqdq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL_VPCLMULQDQ + +define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { +; AVX512VL_VPCLMULQDQ-LABEL: test_x86_pclmulqdq: +; AVX512VL_VPCLMULQDQ: # BB#0: +; AVX512VL_VPCLMULQDQ-NEXT: vpclmulqdq $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x44,0xc1,0x01] +; AVX512VL_VPCLMULQDQ-NEXT: retq # encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 1) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone + +define <4 x i64> @test_x86_pclmulqdq_256(<4 x i64> %a0, <4 x i64> %a1) { +; AVX512VL_VPCLMULQDQ-LABEL: test_x86_pclmulqdq_256: +; AVX512VL_VPCLMULQDQ: # BB#0: +; AVX512VL_VPCLMULQDQ-NEXT: vpclmulqdq $16, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x44,0xc1,0x10] +; AVX512VL_VPCLMULQDQ-NEXT: retq # encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 16) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8) nounwind readnone diff --git a/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx.ll b/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx.ll new file mode 100644 index 00000000000..ec75316bac4 --- /dev/null +++ b/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+vpclmulqdq | FileCheck %s +; FIXME: actual vpclmulqdq operation should be eliminated + +declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8) nounwind readnone + +define <4 x i64> @commute_v1(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_v1: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $0, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 0) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 0) + %3 = xor <4 x i64> %1, %2 + ret <4 x i64> %3 +} + +define <4 x i64> @commute_v2(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_v2: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $16, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 16) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 1) + %3 = xor <4 x i64> %2, %1 + ret <4 x i64> %3 +} + +define <4 x i64> @commute_v3(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_v3: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $17, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 17) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 17) + %3 = xor <4 x i64> %2, %1 + ret <4 x i64> %3 +} + diff --git a/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx512.ll b/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx512.ll new file mode 100644 index 00000000000..c9366056a4f --- /dev/null +++ b/llvm/test/CodeGen/X86/commute-vpclmulqdq-avx512.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+vpclmulqdq,+avx512vl | FileCheck %s +; FIXME: actual vpclmulqdq operation should be eliminated + +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone +declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8) nounwind readnone +declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8) nounwind readnone + +define <2 x i64> @commute_xmm_v1(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: commute_xmm_v1: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 0) + %3 = xor <2 x i64> %1, %2 + ret <2 x i64> %3 +} + +define <2 x i64> @commute_xmm_v2(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: commute_xmm_v2: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $16, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 16) + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 1) + %3 = xor <2 x i64> %2, %1 + ret <2 x i64> %3 +} + +define <2 x i64> @commute_xmm_v3(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: commute_xmm_v3: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $17, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 17) + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 17) + %3 = xor <2 x i64> %2, %1 + ret <2 x i64> %3 +} + +define <4 x i64> @commute_ymm_v1(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_ymm_v1: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $0, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 0) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 0) + %3 = xor <4 x i64> %1, %2 + ret <4 x i64> %3 +} + +define <4 x i64> @commute_ymm_v2(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_ymm_v2: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $16, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 16) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 1) + %3 = xor <4 x i64> %2, %1 + ret <4 x i64> %3 +} + +define <4 x i64> @commute_ymm_v3(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: commute_ymm_v3: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $17, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 17) + %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 17) + %3 = xor <4 x i64> %2, %1 + ret <4 x i64> %3 +} + +define <8 x i64> @commute_zmm_v1(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: commute_zmm_v1: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $0, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 0) + %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 0) + %3 = xor <8 x i64> %1, %2 + ret <8 x i64> %3 +} + +define <8 x i64> @commute_zmm_v2(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: commute_zmm_v2: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $16, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 16) + %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 1) + %3 = xor <8 x i64> %2, %1 + ret <8 x i64> %3 +} + +define <8 x i64> @commute_zmm_v3(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: commute_zmm_v3: +; CHECK: # BB#0: +; CHECK-NEXT: vpclmulqdq $17, %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: retq + %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 17) + %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 17) + %3 = xor <8 x i64> %2, %1 + ret <8 x i64> %3 +} + |

