summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td5
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll43
2 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 753f1b71b07..eebb6401db0 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins MaskRC:$mask, MemOp:$src),
OpcodeStr#Alias, "$src", "$src",
LdDAG,
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src)))),
- _.RC:$src0),
+ (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
vselect, "$src0 = $dst">,
EVEX, Sched<[sched.Folded]>;
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index c42be0d0f1c..2c8978d4a01 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
define <16 x float> @sitof32(<16 x i32> %a) nounwind {
@@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
ret <4 x double> %c
}
-define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) {
; NOVL-LABEL: f32to4f64_mask_load:
; NOVL: # %bb.0:
+; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3
+; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1
+; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1}
+; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVL-NEXT: retq
+;
+; VL-LABEL: f32to4f64_mask_load:
+; VL: # %bb.0:
+; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1
+; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1}
+; VL-NEXT: vmovaps %ymm2, %ymm0
+; VL-NEXT: retq
+ %b = load <4 x float>, <4 x float>* %p
+ %a = fpext <4 x float> %b to <4 x double>
+ %mask = fcmp ogt <4 x double> %a1, %b1
+ %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru
+ ret <4 x double> %c
+}
+
+define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+; NOVL-LABEL: f32to4f64_maskz_load:
+; NOVL: # %bb.0:
; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NOVL-NEXT: vcvtps2pd (%rdi), %ymm2
@@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x
; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; NOVL-NEXT: retq
;
-; VL-LABEL: f32to4f64_mask_load:
+; VL-LABEL: f32to4f64_maskz_load:
; VL: # %bb.0:
; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1
; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z}
OpenPOWER on IntegriCloud