summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td22
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll4
2 files changed, 18 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 3686bbe1ede..3972fa9f8c0 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5552,6 +5552,14 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
Sched<[Sched.Folded, ReadAfterLd]>;
}
+let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+let isCommutable = 0 in {
+ defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
+ loadv2i64, i128mem,
+ SSE_PSHUFB, 0>, VEX_4V;
+}
+}
+
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
@@ -5575,9 +5583,6 @@ let isCommutable = 0 in {
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
SSE_PSIGN, loadv2i64, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
- loadv2i64, i128mem,
- SSE_PSHUFB, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
@@ -5593,6 +5598,14 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
SSE_PMULHRSW, loadv2i64, 0>, VEX_4V;
}
+let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+let isCommutable = 0 in {
+ defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
+ loadv4i64, i256mem,
+ SSE_PSHUFB, 0>, VEX_4V, VEX_L;
+}
+}
+
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
@@ -5613,9 +5626,6 @@ let isCommutable = 0 in {
WriteVecALU>, VEX_4V, VEX_L;
defm VPSIGNDY : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
WriteVecALU>, VEX_4V, VEX_L;
- defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
- loadv4i64, i256mem,
- SSE_PSHUFB, 0>, VEX_4V, VEX_L;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
WriteVecALU>, VEX_4V, VEX_L;
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 534d5c85f00..cf1b15e804f 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -4727,7 +4727,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
-; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1]
+; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xc1]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
@@ -4743,7 +4743,7 @@ define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
-; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
+; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x00,0xc1]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
OpenPOWER on IntegriCloud