summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xllvm/lib/Target/X86/X86SchedBroadwell.td13
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td13
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td13
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td13
-rwxr-xr-xllvm/lib/Target/X86/X86SchedSkylakeServer.td29
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td3
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td22
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td3
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td1
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-shuffle-schedule.ll148
11 files changed, 96 insertions, 163 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 3b58b528097..daa624492bc 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -168,7 +168,8 @@ defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : BWWriteResPair<WriteFShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
@@ -1088,8 +1089,6 @@ def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm",
"VPACKUSWBYrm",
"VPALIGNRYrmi",
"VPBLENDWYrmi",
- "VPERMILPDYmi",
- "VPERMILPSYmi",
"VPSHUFBYrm",
"VPSHUFDYmi",
"VPSHUFHWYmi",
@@ -1101,13 +1100,7 @@ def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm",
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 0c36c5be315..fc55578fa4b 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -165,7 +165,8 @@ defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
+defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
@@ -900,8 +901,6 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm",
"VPACKUSWBYrm",
"VPALIGNRYrmi",
"VPBLENDWYrmi",
- "VPERMILPDYmi",
- "VPERMILPSYmi",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@@ -916,13 +915,7 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm",
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index f2d6a3f4a64..5ef051fe666 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -152,7 +152,8 @@ defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
@@ -1140,16 +1141,8 @@ def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPSYmi",
- "VPERMILPSYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPERMILPSYrm")>;
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 3f68e927dc5..63469e4d973 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -165,7 +165,8 @@ defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multipl
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -1519,9 +1520,7 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
"VPBLENDWYrmi",
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPSYmi",
"VPERMILPSYrm",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
@@ -1537,13 +1536,7 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
"VPUNPCKLBWYrm",
"VPUNPCKLDQYrm",
"VPUNPCKLQDQYrm",
- "VPUNPCKLWDYrm",
- "VSHUFPDYrmi",
- "VSHUFPSYrmi",
- "VUNPCKHPDYrm",
- "VUNPCKHPSYrm",
- "VUNPCKLPDYrm",
- "VUNPCKLPSYrm")>;
+ "VPUNPCKLWDYrm")>;
def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index cce237cfe50..89e4577e60c 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -165,7 +165,8 @@ defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
-defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
+defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -2703,17 +2704,11 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m",
"VPBROADCASTWYrm",
"VPBROADCASTWZ256m(b?)",
"VPBROADCASTWZm(b?)",
- "VPERMILPDYmi",
"VPERMILPDYrm",
- "VPERMILPDZ256m(b?)i",
"VPERMILPDZ256rm(b?)",
- "VPERMILPDZm(b?)i",
"VPERMILPDZrm(b?)",
- "VPERMILPSYmi",
"VPERMILPSYrm",
- "VPERMILPSZ256m(b?)i",
"VPERMILPSZ256rm(b?)",
- "VPERMILPSZm(b?)i",
"VPERMILPSZrm(b?)",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
@@ -2757,25 +2752,7 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m",
"VPUNPCKLQDQZrm(b?)",
"VPUNPCKLWDYrm",
"VPUNPCKLWDZ256rm(b?)",
- "VPUNPCKLWDZrm(b?)",
- "VSHUFPDYrmi",
- "VSHUFPDZ256rm(b?)i",
- "VSHUFPDZrm(b?)i",
- "VSHUFPSYrmi",
- "VSHUFPSZ256rm(b?)i",
- "VSHUFPSZrm(b?)i",
- "VUNPCKHPDYrm",
- "VUNPCKHPDZ256rm(b?)",
- "VUNPCKHPDZrm(b?)",
- "VUNPCKHPSYrm",
- "VUNPCKHPSZ256rm(b?)",
- "VUNPCKHPSZrm(b?)",
- "VUNPCKLPDYrm",
- "VUNPCKLPDZ256rm(b?)",
- "VUNPCKLPDZrm(b?)",
- "VUNPCKLPSYrm",
- "VUNPCKLPSZ256rm(b?)",
- "VUNPCKLPSZrm(b?)")>;
+ "VPUNPCKLWDZrm(b?)")>;
def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 758f035e5ef..cf0e3db0b0d 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -105,6 +105,7 @@ defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
@@ -212,7 +213,7 @@ def SchedWriteFLogic
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
- WriteFShuffle, WriteFShuffle>;
+ WriteFShuffleY, WriteFShuffleY>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleY>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 3090d25b516..6979044c452 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -214,6 +214,7 @@ defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteFShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 10a695d4b7f..15f3464241c 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -330,6 +330,7 @@ defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
+defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
@@ -685,27 +686,6 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
-def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
- VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
- VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
- VUNPCKLPDYrr, VUNPCKLPSYrr)>;
-
-def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm,
- VMOVSLDUPYrm, VPERMILPDYmi,
- VPERMILPSYmi, VSHUFPDYrmi,
- VSHUFPSYrmi, VUNPCKHPDYrm,
- VUNPCKHPSYrm, VUNPCKLPDYrm,
- VUNPCKLPSYrm)>;
-
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 9ea9eb00cee..6c4e1faea77 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -143,7 +143,8 @@ defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 2e92c53b531..4d8887186a0 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -209,6 +209,7 @@ defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
+defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll
index d1af9d44608..90da799bd36 100755
--- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll
@@ -8572,7 +8572,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32>
define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
; GENERIC-LABEL: test2_16xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
+; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_16xi32_perm_mem_mask0:
@@ -8700,7 +8700,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i
define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
; GENERIC-LABEL: test2_16xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
+; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_16xi32_perm_mem_mask3:
@@ -12199,7 +12199,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12220,7 +12220,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12239,7 +12239,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12260,7 +12260,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12279,7 +12279,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12300,7 +12300,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %v
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12333,7 +12333,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12354,7 +12354,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %v
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
@@ -12546,7 +12546,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %ve
define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0:
@@ -12561,7 +12561,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12582,7 +12582,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float>
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
@@ -12601,7 +12601,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12622,7 +12622,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float>
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
@@ -12641,7 +12641,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12662,7 +12662,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float>
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
@@ -12680,7 +12680,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float>
define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3:
@@ -12695,7 +12695,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec
; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12716,7 +12716,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float>
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
@@ -13104,7 +13104,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13125,7 +13125,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double>
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13144,7 +13144,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13165,7 +13165,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double>
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13184,7 +13184,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13205,7 +13205,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double>
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13238,7 +13238,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec
; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13259,7 +13259,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double>
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
@@ -13451,7 +13451,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %ve
define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0:
@@ -13466,7 +13466,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13487,7 +13487,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double>
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
@@ -13506,7 +13506,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13527,7 +13527,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double>
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
@@ -13546,7 +13546,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13567,7 +13567,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double>
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
@@ -13585,7 +13585,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double>
define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3:
@@ -13600,7 +13600,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec
; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -13621,7 +13621,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double>
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
+; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
@@ -14190,7 +14190,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14211,7 +14211,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14230,7 +14230,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14251,7 +14251,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14270,7 +14270,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14291,7 +14291,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14324,7 +14324,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1,
; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14345,7 +14345,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %
; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
@@ -14537,7 +14537,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %v
define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0:
@@ -14552,7 +14552,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14573,7 +14573,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
@@ -14592,7 +14592,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14613,7 +14613,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
@@ -14632,7 +14632,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14653,7 +14653,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
@@ -14671,7 +14671,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float
define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3:
@@ -14686,7 +14686,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve
; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -14707,7 +14707,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float
; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
@@ -15095,7 +15095,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15116,7 +15116,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15135,7 +15135,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15156,7 +15156,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15175,7 +15175,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15196,7 +15196,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15229,7 +15229,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve
; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15250,7 +15250,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double
; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
@@ -15442,7 +15442,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %v
define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0:
@@ -15457,7 +15457,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15478,7 +15478,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
@@ -15497,7 +15497,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15518,7 +15518,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
@@ -15537,7 +15537,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15558,7 +15558,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
@@ -15576,7 +15576,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double
define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3:
@@ -15591,7 +15591,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve
; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -15612,7 +15612,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double
; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
+; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
OpenPOWER on IntegriCloud