diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-select-sse.ll | 12 |
2 files changed, 16 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 057fa3208f3..c2d8f8a8fa6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3194,20 +3194,22 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, (scalar_to_vector _.FRC:$src2))))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))), _.ImmAllZerosV)))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))), (_.VT _.RC:$src0))))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K; let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -3257,8 +3259,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0, (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk) (COPY_TO_REGCLASS _.FRC:$src2, _.RC), (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), - (COPY_TO_REGCLASS _.FRC:$src1, _.RC)), + (_.VT _.RC:$src0), _.FRC:$src1), _.RC)>; def : Pat<(_.VT (OpNode _.RC:$src0, @@ -3268,10 +3269,8 @@ def : Pat<(_.VT (OpNode _.RC:$src0, (_.EltVT ZeroFP))))))), (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz) (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), - (COPY_TO_REGCLASS _.FRC:$src1, _.RC)), + (_.VT _.RC:$src0), _.FRC:$src1), _.RC)>; - } multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, @@ -3334,11 +3333,11 @@ defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), diff --git a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll index e519af5e033..499fe5ba54a 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select-sse.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select-sse.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 -mattr=avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -fast-isel -fast-isel-abort=1 -mattr=avx512f | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=avx512f | FileCheck %s --check-prefix=AVX512 ; Test all cmp predicates that can be used with SSE. |