diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-06-03 19:29:14 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-06-03 19:29:14 +0000 |
| commit | dcf865f0ca557e60b1e902f004012bc54e418878 (patch) | |
| tree | a775eabfa0c49df80696df7601a7e6a39ac998d6 | |
| parent | 0b7f98da6569bbc49aed2675fdda95f89198d356 (diff) | |
| download | bcm5719-llvm-dcf865f0ca557e60b1e902f004012bc54e418878.tar.gz bcm5719-llvm-dcf865f0ca557e60b1e902f004012bc54e418878.zip | |
[X86] Fix the pattern for merge masked vcvtps2pd.
r362199 fixed it for zero masking, but not zero masking. The load
folding in the peephole pass hid the bug. This patch turns off
the peephole pass on the relevant test to ensure coverage.
llvm-svn: 362440
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 43 |
2 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 753f1b71b07..eebb6401db0 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", LdDAG, - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), - _.RC:$src0), + (vselect MaskRC:$mask, LdDAG, _.RC:$src0), vselect, "$src0 = $dst">, EVEX, Sched<[sched.Folded]>; diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index c42be0d0f1c..2c8978d4a01 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW define <16 x float> @sitof32(<16 x i32> %a) nounwind { @@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl ret <4 x double> %c } -define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) { ; NOVL-LABEL: f32to4f64_mask_load: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3 +; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1} +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVL-NEXT: retq +; +; VL-LABEL: f32to4f64_mask_load: +; VL: # %bb.0: +; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 +; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1} +; VL-NEXT: vmovaps %ymm2, %ymm0 +; VL-NEXT: retq + %b = load <4 x float>, <4 x float>* %p + %a = fpext <4 x float> %b to <4 x double> + %mask = fcmp ogt <4 x double> %a1, %b1 + %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru + ret <4 x double> %c +} + +define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +; NOVL-LABEL: f32to4f64_maskz_load: +; NOVL: # %bb.0: ; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NOVL-NEXT: vcvtps2pd (%rdi), %ymm2 @@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NOVL-NEXT: retq ; -; VL-LABEL: f32to4f64_mask_load: +; VL-LABEL: f32to4f64_maskz_load: ; VL: # %bb.0: ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 ; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z} |

