diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 53 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 150 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-bitselect.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec-copysign-avx512.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-128.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-256.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-512.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-128.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-256.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-512.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-256.ll | 24 |
17 files changed, 286 insertions, 169 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 83c1251265f..cb1c7d3c339 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -510,6 +510,7 @@ namespace { bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); + bool tryMatchBitSelect(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -4275,6 +4276,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, return true; } +// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it +// into vpternlog. +bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); + + MVT NVT = N->getSimpleValueType(0); + + // Make sure we support VPTERNLOG. + if (!NVT.isVector() || !Subtarget->hasAVX512()) + return false; + + // We need VLX for 128/256-bit. + if (!(Subtarget->hasVLX() || NVT.is512BitVector())) + return false; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) + std::swap(N0, N1); + + if (N0.getOpcode() != ISD::AND || + N1.getOpcode() != X86ISD::ANDNP || + !N0.hasOneUse() || !N1.hasOneUse()) + return false; + + // ANDN is not commutable, use it to pick down A and C. + SDValue A = N1.getOperand(0); + SDValue C = N1.getOperand(1); + + // AND is commutable, if one operand matches A, the other operand is B. + // Otherwise this isn't a match. + SDValue B; + if (N0.getOperand(0) == A) + B = N0.getOperand(1); + else if (N0.getOperand(1) == A) + B = N0.getOperand(0); + else + return false; + + SDLoc dl(N); + SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); + SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); + ReplaceNode(N, Ternlog.getNode()); + SelectCode(Ternlog.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -4433,6 +4483,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; + if (Opcode == ISD::OR && tryMatchBitSelect(Node)) + return; + LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 60bcf3e2dfd..18c95a631c9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -11436,6 +11436,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, avx512vl_i64_info>, VEX_W; +// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. +let Predicates = [HasVLX] in { + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv16i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv8i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv32i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv16i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv64i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv32i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source @@ -11533,49 +11640,6 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; } -let Predicates = [HasVLX] in { - def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - - def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; -} - -let Predicates = [HasAVX512] in { - def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; -} - //===----------------------------------------------------------------------===// // AVX-512 - FixupImm //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index 743cde84317..ccb969b747f 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -548,9 +548,9 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n ; ; AVX512F-LABEL: bitselect_v8i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 -; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0 -; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1 +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX512F-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0 ; AVX512F-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %0 %4 = load <8 x i64>, <8 x i64>* %1 diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll index 13e2e12c928..5ec547f1db8 100644 --- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll @@ -6,7 +6,7 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { ; CHECK-LABEL: v4f32: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq %tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b ) ret <4 x float> %tmp @@ -16,7 +16,7 @@ define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind { ; CHECK-LABEL: v8f32: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpternlogd $226, %ymm1, %ymm2, %ymm0 ; CHECK-NEXT: retq %tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b ) ret <8 x float> %tmp @@ -26,7 +26,7 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { ; CHECK-LABEL: v16f32: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vpternlogd $226, %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq %tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b ) ret <16 x float> %tmp diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 5530b9920d4..12a5f2bc2cc 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2991,9 +2991,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -3016,16 +3016,16 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index ed5ebcde68e..cf8a80cf9db 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -2514,9 +2514,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: @@ -2539,16 +2539,16 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 0f5558d7d5f..b6c5d9f744e 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -1559,30 +1559,30 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index ff56dddd383..ce521ad8896 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1846,9 +1846,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -1862,9 +1862,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512VLBW-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index be141995329..ca624b0a82e 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -436,14 +436,14 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -1504,9 +1504,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: @@ -1520,9 +1520,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 94c0be32bc8..8cb0f36a176 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -829,16 +829,16 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 253826976db..00f5d73a4de 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -3012,9 +3012,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -3037,16 +3037,16 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 918270dc668..8898373bfe8 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -2515,9 +2515,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: @@ -2540,16 +2540,16 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 748aa84974d..ca559a6911a 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -1543,30 +1543,30 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 +; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index f3918daae5e..d88a2a214ca 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1928,9 +1928,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -1944,9 +1944,9 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind { ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 ; AVX512VLBW-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 4f940f464b3..bf7c057965b 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -475,16 +475,16 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -1582,9 +1582,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: @@ -1598,9 +1598,9 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind { ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index 33b681861aa..3838dfd4dd1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -849,16 +849,16 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 1b7555cebdf..df76a7738f8 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -432,14 +432,14 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512VL-LABEL: var_rotate_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -1505,9 +1505,9 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind { ; ; AVX512VL-LABEL: splatconstant_rotate_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v32i8: @@ -1787,9 +1787,9 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind { ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; |