diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 39 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/commute-fcmp.ll | 257 |
3 files changed, 345 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 59064b3ccaf..6eb9607bb9d 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2077,7 +2077,33 @@ multiclass avx512_vcmp_common<X86VectorVTInfo _> { "$cc, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; } - } + } + + // Patterns for selecting with loads in other operand. + def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), + CommutableCMPCC:$cc), + (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2), + (_.VT _.RC:$src1), + CommutableCMPCC:$cc)), + (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), CommutableCMPCC:$cc), + (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), + CommutableCMPCC:$cc)), + (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + imm:$cc)>; } multiclass avx512_vcmp_sae<X86VectorVTInfo _> { @@ -2119,6 +2145,17 @@ defm VCMPPS : avx512_vcmp<avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +// Patterns to select fp compares with load as first operand. +let Predicates = [HasAVX512] in { + def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, + CommutableCMPCC:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, + CommutableCMPCC:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; +} + // ---------------------------------------------------------------- // FPClass //handle fpclass instruction mask = op(reg_scalar,imm) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 77eb33d32b4..e328c2fa6a4 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2308,6 +2308,58 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble, memopv2f64, SSE_ALU_F64P>, PD; } +def CommutableCMPCC : PatLeaf<(imm), [{ + return (N->getZExtValue() == 0x00 || N->getZExtValue() == 0x03 || + N->getZExtValue() == 0x04 || N->getZExtValue() == 0x07); +}]>; + +// Patterns to select compares with loads in first operand. +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), + (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), + (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, + CommutableCMPCC:$cc)), + (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, + CommutableCMPCC:$cc)), + (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [UseSSE2] in { + def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, + CommutableCMPCC:$cc)), + (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [UseSSE1] in { + def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, + CommutableCMPCC:$cc)), + (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/commute-fcmp.ll b/llvm/test/CodeGen/X86/commute-fcmp.ll index f05fb805b41..02820e4b39a 100644 --- a/llvm/test/CodeGen/X86/commute-fcmp.ll +++ b/llvm/test/CodeGen/X86/commute-fcmp.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512 ; ; Float Comparisons @@ -17,6 +18,13 @@ define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_eq: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp oeq <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -33,6 +41,13 @@ define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ne: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp une <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -49,6 +64,13 @@ define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ord: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ord <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -65,6 +87,13 @@ define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_uno: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp uno <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -88,6 +117,16 @@ define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) { ; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ueq: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %xmm1 +; AVX512-NEXT: vcmpeqps %xmm0, %xmm1, %k0 +; AVX512-NEXT: vcmpunordps %xmm0, %xmm1, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ueq <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -111,6 +150,15 @@ define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) { ; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_one: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %xmm1 +; AVX512-NEXT: vcmpordps %xmm0, %xmm1, %k1 +; AVX512-NEXT: vcmpneqps %xmm0, %xmm1, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp one <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -130,6 +178,14 @@ define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) { ; AVX-NEXT: vmovaps (%rdi), %xmm1 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_lt: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %xmm1 +; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp olt <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -149,6 +205,14 @@ define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) { ; AVX-NEXT: vmovaps (%rdi), %xmm1 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_le: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %xmm1 +; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ole <4 x float> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i32> @@ -166,6 +230,13 @@ define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_eq_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp oeq <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -183,6 +254,13 @@ define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ne_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp une <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -200,6 +278,13 @@ define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ord_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ord <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -217,6 +302,13 @@ define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_uno_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp uno <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -245,6 +337,16 @@ define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_ueq_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %ymm1 +; AVX512-NEXT: vcmpeqps %ymm0, %ymm1, %k0 +; AVX512-NEXT: vcmpunordps %ymm0, %ymm1, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ueq <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -273,6 +375,15 @@ define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_one_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %ymm1 +; AVX512-NEXT: vcmpordps %ymm0, %ymm1, %k1 +; AVX512-NEXT: vcmpneqps %ymm0, %ymm1, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp one <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -295,6 +406,14 @@ define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX-NEXT: vmovaps (%rdi), %ymm1 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_lt_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %ymm1 +; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp olt <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -317,6 +436,14 @@ define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) { ; AVX-NEXT: vmovaps (%rdi), %ymm1 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmpps_le_ymm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %ymm1 +; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ole <8 x float> %1, %a1 %3 = sext <8 x i1> %2 to <8 x i32> @@ -338,6 +465,13 @@ define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_eq: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp oeq <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -354,6 +488,13 @@ define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ne: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp une <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -370,6 +511,13 @@ define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ord: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ord <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -393,6 +541,16 @@ define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) { ; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ueq: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %xmm1 +; AVX512-NEXT: vcmpeqpd %xmm0, %xmm1, %k0 +; AVX512-NEXT: vcmpunordpd %xmm0, %xmm1, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ueq <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -416,6 +574,15 @@ define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) { ; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_one: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %xmm1 +; AVX512-NEXT: vcmpordpd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vcmpneqpd %xmm0, %xmm1, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp one <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -432,6 +599,13 @@ define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_uno: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp uno <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -451,6 +625,14 @@ define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) { ; AVX-NEXT: vmovapd (%rdi), %xmm1 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_lt: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %xmm1 +; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp olt <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -470,6 +652,14 @@ define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) { ; AVX-NEXT: vmovapd (%rdi), %xmm1 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_le: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %xmm1 +; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ole <2 x double> %1, %a1 %3 = sext <2 x i1> %2 to <2 x i64> @@ -487,6 +677,13 @@ define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_eq_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp oeq <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -504,6 +701,13 @@ define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ne_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp une <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -521,6 +725,13 @@ define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ord_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ord <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -538,6 +749,13 @@ define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX: # BB#0: ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_uno_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp uno <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -566,6 +784,16 @@ define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_ueq_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %ymm1 +; AVX512-NEXT: vcmpeqpd %ymm0, %ymm1, %k0 +; AVX512-NEXT: vcmpunordpd %ymm0, %ymm1, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ueq <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -594,6 +822,15 @@ define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_one_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %ymm1 +; AVX512-NEXT: vcmpordpd %ymm0, %ymm1, %k1 +; AVX512-NEXT: vcmpneqpd %ymm0, %ymm1, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp one <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -616,6 +853,14 @@ define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX-NEXT: vmovapd (%rdi), %ymm1 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_lt_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %ymm1 +; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp olt <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> @@ -638,6 +883,14 @@ define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) { ; AVX-NEXT: vmovapd (%rdi), %ymm1 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: commute_cmppd_le_ymmm: +; AVX512: # BB#0: +; AVX512-NEXT: vmovapd (%rdi), %ymm1 +; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ole <4 x double> %1, %a1 %3 = sext <4 x i1> %2 to <4 x i64> |

