diff options
author | Craig Topper <craig.topper@intel.com> | 2018-02-24 00:15:05 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-02-24 00:15:05 +0000 |
commit | 7bcac492d49d8f2d208a73491235d29c568b5e53 (patch) | |
tree | d01dd63a30a0de334b2d5a96f0996277db7f33cd | |
parent | b7a9bedfcda215bf317147e81939dd57902c6da7 (diff) | |
download | bcm5719-llvm-7bcac492d49d8f2d208a73491235d29c568b5e53.tar.gz bcm5719-llvm-7bcac492d49d8f2d208a73491235d29c568b5e53.zip |
[X86] Remove checks for '(scalar_to_vector (i8 (trunc GR32:)))' from scalar masked move patterns.
This portion can be matched by other patterns. We don't need it to make the larger pattern valid. It's sufficient to have a v1i1 mask input without caring where it came from.
llvm-svn: 325999
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-load-store.ll | 14 |
2 files changed, 8 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2e6566f6216..7b17ab322e9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3878,22 +3878,22 @@ multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector - (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))), + (_.EltVT (X86selects VK1WM:$mask, (_.EltVT _.FRC:$src1), (_.EltVT _.FRC:$src2))))))), (!cast<Instruction>(InstrStr#rrk) (COPY_TO_REGCLASS _.FRC:$src2, _.RC), - (COPY_TO_REGCLASS GR32:$mask, VK1WM), + VK1WM:$mask, (_.VT _.RC:$src0), (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector - (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))), + (_.EltVT (X86selects VK1WM:$mask, (_.EltVT _.FRC:$src1), (_.EltVT ZeroFP))))))), (!cast<Instruction>(InstrStr#rrkz) - (COPY_TO_REGCLASS GR32:$mask, VK1WM), + VK1WM:$mask, (_.VT _.RC:$src0), (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; } diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index 8589215f4a1..b6000c09da2 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -13,8 +13,7 @@ define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} -; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; CHECK32-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 @@ -37,9 +36,7 @@ define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 @@ -61,8 +58,7 @@ define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} -; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; CHECK32-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 @@ -85,9 +81,7 @@ define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, < ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 -; CHECK32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 |