summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-07-04 05:46:11 +0000
committerCraig Topper <craig.topper@intel.com>2017-07-04 05:46:11 +0000
commitad140cfb689f1bac5bdce3c4af2f26d3fb598b9e (patch)
tree304966a59510638ae9ac9a1b30fe76dedb815c61
parent49fc24a8bfd838d99aeb979e0a9af345c62b3d44 (diff)
downloadbcm5719-llvm-ad140cfb689f1bac5bdce3c4af2f26d3fb598b9e.tar.gz
bcm5719-llvm-ad140cfb689f1bac5bdce3c4af2f26d3fb598b9e.zip
[X86] Add comment string for broadcast loads from the constant pool.
Summary: When broadcasting from the constant pool its useful to print out the final vector similar to what we do for normal moves from the constant pool. I changed only a couple tests that were broadcast focused. One of them had been previously hand tweaked after running the script so that it could check the constant pool declaration. But I think this patch makes that unnecessary now since we can check the comment instead. Reviewers: spatel, RKSimon, zvi Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34923 llvm-svn: 307062
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp193
-rw-r--r--llvm/test/CodeGen/X86/avg.ll6
-rw-r--r--llvm/test/CodeGen/X86/avx2-vbroadcast.ll12
-rw-r--r--llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll1991
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath.ll40
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath2.ll40
-rw-r--r--llvm/test/CodeGen/X86/vec_shift6.ll9
7 files changed, 1615 insertions, 676 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index c919007013d..fd2837b7910 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI,
return Comment;
}
+static void printConstant(const Constant *COp, raw_ostream &CS) {
+ if (isa<UndefValue>(COp)) {
+ CS << "u";
+ } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
+ if (CI->getBitWidth() <= 64) {
+ CS << CI->getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ const auto &Val = CI->getValue();
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
+ } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
+ SmallString<32> Str;
+ CF->getValueAPF().toString(Str);
+ CS << Str;
+ } else {
+ CS << "?";
+ }
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
@@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// For loads from a constant pool to a vector register, print the constant
// loaded.
CASE_ALL_MOV_RM()
+ case X86::VBROADCASTF128:
+ case X86::VBROADCASTI128:
+ case X86::VBROADCASTF32X4Z256rm:
+ case X86::VBROADCASTF32X4rm:
+ case X86::VBROADCASTF32X8rm:
+ case X86::VBROADCASTF64X2Z128rm:
+ case X86::VBROADCASTF64X2rm:
+ case X86::VBROADCASTF64X4rm:
+ case X86::VBROADCASTI32X4Z256rm:
+ case X86::VBROADCASTI32X4rm:
+ case X86::VBROADCASTI32X8rm:
+ case X86::VBROADCASTI64X2Z128rm:
+ case X86::VBROADCASTI64X2rm:
+ case X86::VBROADCASTI64X4rm:
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() <= 4)
break;
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ int NumLanes = 1;
+ // Override NumLanes for the broadcast instructions.
+ switch (MI->getOpcode()) {
+ case X86::VBROADCASTF128: NumLanes = 2; break;
+ case X86::VBROADCASTI128: NumLanes = 2; break;
+ case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
+ case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
+ case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
+ case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
+ case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
+ case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
+ case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
+ case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
+ case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
+ case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
+ case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
+ case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
+ }
+
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
CS << "[";
- for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
- if (i != 0)
- CS << ",";
- if (CDS->getElementType()->isIntegerTy())
- CS << CDS->getElementAsInteger(i);
- else if (CDS->getElementType()->isFloatTy())
- CS << CDS->getElementAsFloat(i);
- else if (CDS->getElementType()->isDoubleTy())
- CS << CDS->getElementAsDouble(i);
- else
- CS << "?";
+ for (int l = 0; l != NumLanes; ++l) {
+ for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
+ if (i != 0 || l != 0)
+ CS << ",";
+ if (CDS->getElementType()->isIntegerTy())
+ CS << CDS->getElementAsInteger(i);
+ else if (CDS->getElementType()->isFloatTy())
+ CS << CDS->getElementAsFloat(i);
+ else if (CDS->getElementType()->isDoubleTy())
+ CS << CDS->getElementAsDouble(i);
+ else
+ CS << "?";
+ }
}
CS << "]";
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
- for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
- if (i != 0)
- CS << ",";
- Constant *COp = CV->getOperand(i);
- if (isa<UndefValue>(COp)) {
- CS << "u";
- } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- if (CI->getBitWidth() <= 64) {
- CS << CI->getZExtValue();
- } else {
- // print multi-word constant as (w0,w1)
- const auto &Val = CI->getValue();
- CS << "(";
- for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
- if (i > 0)
- CS << ",";
- CS << Val.getRawData()[i];
- }
- CS << ")";
- }
- } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
- SmallString<32> Str;
- CF->getValueAPF().toString(Str);
- CS << Str;
- } else {
- CS << "?";
+ for (int l = 0; l != NumLanes; ++l) {
+ for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
+ if (i != 0 || l != 0)
+ CS << ",";
+ printConstant(CV->getOperand(i), CS);
}
}
CS << ">";
@@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
break;
+ case X86::VBROADCASTSSrm:
+ case X86::VBROADCASTSSYrm:
+ case X86::VBROADCASTSSZ128m:
+ case X86::VBROADCASTSSZ256m:
+ case X86::VBROADCASTSSZm:
+ case X86::VBROADCASTSDYrm:
+ case X86::VBROADCASTSDZ256m:
+ case X86::VBROADCASTSDZm:
+ case X86::VPBROADCASTBrm:
+ case X86::VPBROADCASTBYrm:
+ case X86::VPBROADCASTBZ128m:
+ case X86::VPBROADCASTBZ256m:
+ case X86::VPBROADCASTBZm:
+ case X86::VPBROADCASTDrm:
+ case X86::VPBROADCASTDYrm:
+ case X86::VPBROADCASTDZ128m:
+ case X86::VPBROADCASTDZ256m:
+ case X86::VPBROADCASTDZm:
+ case X86::VPBROADCASTQrm:
+ case X86::VPBROADCASTQYrm:
+ case X86::VPBROADCASTQZ128m:
+ case X86::VPBROADCASTQZ256m:
+ case X86::VPBROADCASTQZm:
+ case X86::VPBROADCASTWrm:
+ case X86::VPBROADCASTWYrm:
+ case X86::VPBROADCASTWZ128m:
+ case X86::VPBROADCASTWZ256m:
+ case X86::VPBROADCASTWZm:
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ if (MI->getNumOperands() <= 4)
+ break;
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ int NumElts;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VBROADCASTSSrm: NumElts = 4; break;
+ case X86::VBROADCASTSSYrm: NumElts = 8; break;
+ case X86::VBROADCASTSSZ128m: NumElts = 4; break;
+ case X86::VBROADCASTSSZ256m: NumElts = 8; break;
+ case X86::VBROADCASTSSZm: NumElts = 16; break;
+ case X86::VBROADCASTSDYrm: NumElts = 4; break;
+ case X86::VBROADCASTSDZ256m: NumElts = 4; break;
+ case X86::VBROADCASTSDZm: NumElts = 8; break;
+ case X86::VPBROADCASTBrm: NumElts = 16; break;
+ case X86::VPBROADCASTBYrm: NumElts = 32; break;
+ case X86::VPBROADCASTBZ128m: NumElts = 16; break;
+ case X86::VPBROADCASTBZ256m: NumElts = 32; break;
+ case X86::VPBROADCASTBZm: NumElts = 64; break;
+ case X86::VPBROADCASTDrm: NumElts = 4; break;
+ case X86::VPBROADCASTDYrm: NumElts = 8; break;
+ case X86::VPBROADCASTDZ128m: NumElts = 4; break;
+ case X86::VPBROADCASTDZ256m: NumElts = 8; break;
+ case X86::VPBROADCASTDZm: NumElts = 16; break;
+ case X86::VPBROADCASTQrm: NumElts = 2; break;
+ case X86::VPBROADCASTQYrm: NumElts = 4; break;
+ case X86::VPBROADCASTQZ128m: NumElts = 2; break;
+ case X86::VPBROADCASTQZ256m: NumElts = 4; break;
+ case X86::VPBROADCASTQZm: NumElts = 8; break;
+ case X86::VPBROADCASTWrm: NumElts = 8; break;
+ case X86::VPBROADCASTWYrm: NumElts = 16; break;
+ case X86::VPBROADCASTWZ128m: NumElts = 8; break;
+ case X86::VPBROADCASTWZ256m: NumElts = 16; break;
+ case X86::VPBROADCASTWZm: NumElts = 32; break;
+ }
+
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ CS << "[";
+ for (int i = 0; i != NumElts; ++i) {
+ if (i != 0)
+ CS << ",";
+ printConstant(C, CS);
+ }
+ CS << "]";
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ }
}
MCInst TmpInst;
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index e5f7cc5c6dd..640b5215afe 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -2624,7 +2624,8 @@ define void @avg_v64i8_const(<64 x i8>* %a) {
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
+; AVX512F-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpaddd %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpaddd %zmm4, %zmm2, %zmm2
; AVX512F-NEXT: vpaddd %zmm4, %zmm1, %zmm1
@@ -2941,7 +2942,8 @@ define void @avg_v32i16_const(<32 x i16>* %a) {
; AVX512F: # BB#0:
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
+; AVX512F-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 971d03af377..318c9cfd8a3 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -633,13 +633,13 @@ entry:
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
; X32-AVX2-LABEL: V111:
; X32-AVX2: ## BB#0: ## %entry
-; X32-AVX2-NEXT: vpbroadcastd LCPI29_0, %ymm1
+; X32-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: V111:
; X64-AVX2: ## BB#0: ## %entry
-; X64-AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
@@ -660,13 +660,13 @@ entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-AVX2-LABEL: V113:
; X32-AVX2: ## BB#0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss LCPI30_0, %ymm1
+; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: V113:
; X64-AVX2: ## BB#0: ## %entry
-; X64-AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
@@ -687,12 +687,12 @@ entry:
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## BB#0:
-; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
+; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X32-NEXT: retl
;
; X64-LABEL: _e2:
; X64: ## BB#0:
-; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
+; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X64-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index bbe31c5c2ac..14bdb3853b0 100644
--- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -1,13 +1,12 @@
-; NOTE: Assertions have been simpilfied MANUALLY after running utils/update_llc_test_checks.py
-; Assertions for constant pools have been added MANUALLY.
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX2
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX512
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL -check-prefix=ALL32 -check-prefix=AVX512BW -check-prefix=AVX512
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL64 -check-prefix=NO-AVX512BW -check-prefix=AVX2 -check-prefix=AVX2-64
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL -check-prefix=ALL64 -check-prefix=NO-AVX512BW -check-prefix=AVX512 -check-prefix=AVX512F-64
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL -check-prefix=ALL64 -check-prefix=AVX512BW -check-prefix=AVX512 -check-prefix=AVX512BW-64
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX512
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL32 -check-prefix=AVX512 -check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL64 -check-prefix=NO-AVX512BW-64 -check-prefix=AVX2-64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL64 -check-prefix=NO-AVX512BW-64 -check-prefix=AVX512F-64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL64 -check-prefix=AVX512F-64 -check-prefix=AVX512BW-64
;===-----------------------------------------------------------------------------===
; This test checks the ability to recognize a cross element pattern of
@@ -17,20 +16,31 @@
; <i32 0, i32 1, i32 0, i32 1> => broadcast of the constant vector <i32 0, i32 1>
;===-----------------------------------------------------------------------------===
-; ALL: LCPI0
-; ALL-NEXT: .short 256 # 0x100
-
define <16 x i8> @f16xi8_i16(<16 x i8> %a) {
+; AVX-LABEL: f16xi8_i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f16xi8_i16:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f16xi8_i16:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f16xi8_i16:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -40,45 +50,48 @@ define <16 x i8> @f16xi8_i16(<16 x i8> %a) {
}
-; ALL: .LCPI1
-; ALL-NEXT: .long 50462976 # 0x3020100
-
-; AVX: .LCPI1
-; AVX-NEXT .long 50462976 # float 3.82047143E-37
-
define <16 x i8> @f16xi8_i32(<16 x i8> %a) {
+; AVX-LABEL: f16xi8_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f16xi8_i32:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f16xi8_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f16xi8_i32:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f16xi8_i32:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a
%res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1
ret <16 x i8> %res2
}
-; ALL64: .LCPI2
-; ALL64-NEXT: .quad 506097522914230528 # 0x706050403020100
-
-; AVX: .LCPI2
-; AVX-NEXT: .quad 506097522914230528 # double 7.9499288951273625E-275
-
define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
+; AVX-LABEL: f16xi8_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f16xi8_i64:
; ALL32: # BB#0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
@@ -86,38 +99,56 @@ define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f16xi8_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f16xi8_i64:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastq {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f16xi8_i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a
%res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1
ret <16 x i8> %res2
}
-; ALL: .LCPI3
-; ALL-NEXT: .short 256 # 0x100
-
define <32 x i8> @f32xi8_i16(<32 x i8> %a) {
+; AVX-LABEL: f32xi8_i16:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f32xi8_i16:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm1
+; ALL32-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f32xi8_i16:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f32xi8_i16:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm1
+; ALL64-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -127,155 +158,273 @@ define <32 x i8> @f32xi8_i16(<32 x i8> %a) {
}
-; ALL: .LCPI4
-; ALL-NEXT: .long 50462976 # 0x3020100
-
-; AVX: .LCPI4
-; AVX-NEXT: .long 50462976 # float 3.82047143E-37
-
define <32 x i8> @f32xi8_i32(<32 x i8> %a) {
+; AVX-LABEL: f32xi8_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f32xi8_i32:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
+; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f32xi8_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f32xi8_i32:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f32xi8_i32:
-; AVX: # BB#0:
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm2
-; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
%res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a
%res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1
ret <32 x i8> %res2
}
-; ALL64: .LCPI5
-; ALL64-NEXT: .quad 506097522914230528 # 0x706050403020100
-
-; AVX: .LCPI5
-; AVX-NEXT: .quad 506097522914230528 # double 7.9499288951273625E-275
-
define <32 x i8> @f32xi8_i64(<32 x i8> %a) {
+; AVX-LABEL: f32xi8_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f32xi8_i64:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastq {{\.LCPI.*}}, %ymm1
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f32xi8_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f32xi8_i64:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f32xi8_i64:
-; AVX: # BB#0:
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
-; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
%res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a
%res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1
ret <32 x i8> %res2
}
-; ALL: .LCPI6
-; ALL-NEXT: .byte 0 # 0x0
-; ALL-NEXT: .byte 1 # 0x1
-; ALL-NEXT: .byte 2 # 0x2
-; ALL-NEXT: .byte 3 # 0x3
-; ALL-NEXT: .byte 4 # 0x4
-; ALL-NEXT: .byte 5 # 0x5
-; ALL-NEXT: .byte 6 # 0x6
-; ALL-NEXT: .byte 7 # 0x7
-; ALL-NEXT: .byte 8 # 0x8
-; ALL-NEXT: .byte 9 # 0x9
-; ALL-NEXT: .byte 10 # 0xa
-; ALL-NEXT: .byte 11 # 0xb
-; ALL-NEXT: .byte 12 # 0xc
-; ALL-NEXT: .byte 13 # 0xd
-; ALL-NEXT: .byte 14 # 0xe
-; ALL-NEXT: .byte 15 # 0xf
-; ALL-NOT: .byte
-
define <32 x i8> @f32xi8_i128(<32 x i8> %a) {
-; ALL-LABEL: f32xi8_i128:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
-; ALL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-LABEL: f32xi8_i128:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f32xi8_i128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f32xi8_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f32xi8_i128:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a
%res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1
ret <32 x i8> %res2
}
-; ALL: .LCPI7
-; ALL-NEXT: .short 256 # 0x100
-
define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
+; AVX-LABEL: f64xi8_i16:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f64xi8_i16:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm2
+; NO-AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f64xi8_i16:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpbroadcastw {{\.LCPI.*}}, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
+;
+; AVX-64-LABEL: f64xi8_i16:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f64xi8_i16:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f64xi8_i16:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
%res1 = add <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a
%res2 = and <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1
ret <64 x i8> %res2
}
-; ALL: .LCPI8
-; ALL-NEXT: .long 50462976 # 0x3020100
-
-; AVX: .LCPI8
-; AVX-NEXT: .long 50462976 # float 3.82047143E-37
-
define <64 x i8> @f64i8_i32(<64 x i8> %a) {
+; AVX-LABEL: f64i8_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f64i8_i32:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm2
+; NO-AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f64i8_i32:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpbroadcastd {{\.LCPI.*}}, %zmm1
+; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
;
-; AVX-LABEL: f64i8_i32:
+; AVX-64-LABEL: f64i8_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f64i8_i32:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f64i8_i32:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
+ %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a
+ %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1
+ ret <64 x i8> %res2
+}
+
+
+define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
+; AVX-LABEL: f64xi8_i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm3
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -283,43 +432,69 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
- %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a
- %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1
- ret <64 x i8> %res2
-}
-
-
-; ALL64: .LCPI9
-; ALL64-NEXT: .quad 506097522914230528 # 0x706050403020100
-
-; ALL32: .LCPI9
-; ALL32-NEXT: .quad 506097522914230528 # double 7.9499288951273625E-275
-
-; AVX: .LCPI9
-; AVX-NEXT: .quad 506097522914230528 # double 7.9499288951273625E-275
-
-define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f64xi8_i64:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vpbroadcastq {{.*}}, %ymm2
+; NO-AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275]
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f64xi8_i64:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpbroadcastq {{.*}}, %zmm1
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
;
-; AVX-LABEL: f64xi8_i64:
+; AVX-64-LABEL: f64xi8_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f64xi8_i64:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f64xi8_i64:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
+ %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a
+ %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1
+ ret <64 x i8> %res2
+}
+
+
+define <64 x i8> @f64xi8_i128(<64 x i8> %a) {
+; AVX-LABEL: f64xi8_i128:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -327,143 +502,184 @@ define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
- %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a
- %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1
- ret <64 x i8> %res2
-}
-
-
-; ALL: .LCPI10
-; ALL-NEXT: .byte 0 # 0x0
-; ALL-NEXT: .byte 1 # 0x1
-; ALL-NEXT: .byte 2 # 0x2
-; ALL-NEXT: .byte 3 # 0x3
-; ALL-NEXT: .byte 4 # 0x4
-; ALL-NEXT: .byte 5 # 0x5
-; ALL-NEXT: .byte 6 # 0x6
-; ALL-NEXT: .byte 7 # 0x7
-; ALL-NEXT: .byte 8 # 0x8
-; ALL-NEXT: .byte 9 # 0x9
-; ALL-NEXT: .byte 10 # 0xa
-; ALL-NEXT: .byte 11 # 0xb
-; ALL-NEXT: .byte 12 # 0xc
-; ALL-NEXT: .byte 13 # 0xd
-; ALL-NEXT: .byte 14 # 0xe
-; ALL-NEXT: .byte 15 # 0xf
-; ALL-NOT: .byte
-
-define <64 x i8> @f64xi8_i128(<64 x i8> %a) {
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f64xi8_i128:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NO-AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f64xi8_i128:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
+;
+; AVX-64-LABEL: f64xi8_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f64xi8_i128:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NO-AVX512BW-64-NEXT: # ymm2 = mem[0,1,0,1]
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f64xi8_i128:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
%res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a
%res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1
ret <64 x i8> %res2
}
-; AVX512BW: .LCPI11
-; AVX512BW-NEXT: .byte 0 # 0x0
-; AVX512BW-NEXT: .byte 1 # 0x1
-; AVX512BW-NEXT: .byte 2 # 0x2
-; AVX512BW-NEXT: .byte 3 # 0x3
-; AVX512BW-NEXT: .byte 4 # 0x4
-; AVX512BW-NEXT: .byte 5 # 0x5
-; AVX512BW-NEXT: .byte 6 # 0x6
-; AVX512BW-NEXT: .byte 7 # 0x7
-; AVX512BW-NEXT: .byte 8 # 0x8
-; AVX512BW-NEXT: .byte 9 # 0x9
-; AVX512BW-NEXT: .byte 10 # 0xa
-; AVX512BW-NEXT: .byte 11 # 0xb
-; AVX512BW-NEXT: .byte 12 # 0xc
-; AVX512BW-NEXT: .byte 13 # 0xd
-; AVX512BW-NEXT: .byte 14 # 0xe
-; AVX512BW-NEXT: .byte 15 # 0xf
-; AVX512BW-NEXT: .byte 16 # 0x10
-; AVX512BW-NEXT: .byte 17 # 0x11
-; AVX512BW-NEXT: .byte 18 # 0x12
-; AVX512BW-NEXT: .byte 19 # 0x13
-; AVX512BW-NEXT: .byte 20 # 0x14
-; AVX512BW-NEXT: .byte 21 # 0x15
-; AVX512BW-NEXT: .byte 22 # 0x16
-; AVX512BW-NEXT: .byte 23 # 0x17
-; AVX512BW-NEXT: .byte 24 # 0x18
-; AVX512BW-NEXT: .byte 25 # 0x19
-; AVX512BW-NEXT: .byte 26 # 0x1a
-; AVX512BW-NEXT: .byte 27 # 0x1b
-; AVX512BW-NEXT: .byte 28 # 0x1c
-; AVX512BW-NEXT: .byte 29 # 0x1d
-; AVX512BW-NEXT: .byte 30 # 0x1e
-; AVX512BW-NEXT: .byte 31 # 0x1f
-; AVX512BW-NOT: .byte
-
define <64 x i8> @f64xi8_i256(<64 x i8> %a) {
+; AVX-LABEL: f64xi8_i256:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
+; NO-AVX512BW-LABEL: f64xi8_i256:
+; NO-AVX512BW: # BB#0:
+; NO-AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
+;
; AVX512BW-LABEL: f64xi8_i256:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
+;
+; AVX-64-LABEL: f64xi8_i256:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f64xi8_i256:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f64xi8_i256:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
%res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %a
%res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %res1
ret <64 x i8> %res2
}
-; ALL: .LCPI12
-; ALL-NEXT: .long 65536 # 0x10000
-
-; AVX: .LCPI12
-; AVX-NEXT: .long 65536 # float 9.18354962E-41
-
define <8 x i16> @f8xi16_i32(<8 x i16> %a) {
+; AVX-LABEL: f8xi16_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f8xi16_i32:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f8xi16_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f8xi16_i32:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f8xi16_i32:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
-; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a
%res2 = and <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1
ret <8 x i16> %res2
}
-; ALL64: .LCPI13
-; ALL64-NEXT: .quad 844433520132096 # 0x3000200010000
-
-; ALL32: .LCPI13
-; ALL32-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
-; AVX: .LCPI13
-; AVX-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
+; AVX-LABEL: f8xi16_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f8xi16_i64:
; ALL32: # BB#0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
@@ -471,67 +687,66 @@ define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f8xi16_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f8xi16_i64:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastq {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [844433520132096,844433520132096]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f8xi16_i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a
%res2 = and <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1
ret <8 x i16> %res2
}
-; ALL: .LCPI14
-; ALL-NEXT: .long 65536 # 0x10000
-
-; AVX: .LCPI14
-; AVX-NEXT: .long 65536 # float 9.18354962E-41
-
define <16 x i16> @f16xi16_i32(<16 x i16> %a) {
-; ALL-LABEL: f16xi16_i32:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
-; ALL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
-;
; AVX-LABEL: f16xi16_i32:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm2
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f16xi16_i32:
+; ALL32: # BB#0:
+; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f16xi16_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f16xi16_i32:
+; ALL64: # BB#0:
+; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a
%res2 = and <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1
ret <16 x i16> %res2
}
-; ALL64: .LCPI15
-; ALL64-NEXT: .quad 844433520132096 # 0x3000200010000
-
-; ALL32: .LCPI15
-; ALL32-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
-; AVX: .LCPI15
-; AVX-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
-; ALL-LABEL: f16xi16_i64:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq {{.*}}, %ymm1
-; ALL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
-;
; AVX-LABEL: f16xi16_i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -540,60 +755,154 @@ define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f16xi16_i64:
+; ALL32: # BB#0:
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309]
+; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f16xi16_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f16xi16_i64:
+; ALL64: # BB#0:
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a
%res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1
ret <16 x i16> %res2
}
-; ALL: .LCPI16
-; ALL-NEXT: .short 0 # 0x0
-; ALL-NEXT: .short 1 # 0x1
-; ALL-NEXT: .short 2 # 0x2
-; ALL-NEXT: .short 3 # 0x3
-; ALL-NEXT: .short 4 # 0x4
-; ALL-NEXT: .short 5 # 0x5
-; ALL-NEXT: .short 6 # 0x6
-; ALL-NEXT: .short 7 # 0x7
-; ALL-NOT: .short
-
define <16 x i16> @f16xi16_i128(<16 x i16> %a) {
-; ALL-LABEL: f16xi16_i128:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
-; ALL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-LABEL: f16xi16_i128:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f16xi16_i128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f16xi16_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f16xi16_i128:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a
%res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1
ret <16 x i16> %res2
}
-; ALL: .LCPI17
-; ALL-NEXT: .long 65536 # 0x10000
-
-; AVX: .LCPI17
-; AVX-NEXT: .long 65536 # float 9.18354962E-41
-
define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
+; AVX-LABEL: f32xi16_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f32xi16_i32:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm2
+; NO-AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f32xi16_i32:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpbroadcastd {{\.LCPI.*}}, %zmm1
+; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
;
-; AVX-LABEL: f32xi16_i32:
+; AVX-64-LABEL: f32xi16_i32:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f32xi16_i32:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f32xi16_i32:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
+ %res1 = add <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a
+ %res2 = and <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1
+ ret <32 x i16> %res2
+}
+
+
+define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
+; AVX-LABEL: f32xi16_i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm3
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -601,43 +910,69 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
- %res1 = add <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a
- %res2 = and <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1
- ret <32 x i16> %res2
-}
-
-
-; ALL64: .LCPI18
-; ALL64-NEXT: .quad 844433520132096 # 0x3000200010000
-
-; ALL32: .LCPI18
-; ALL32-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
-; AVX: .LCPI18
-; AVX-NEXT: .quad 844433520132096 # double 4.1720559249406128E-309
-
-define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f32xi16_i64:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vpbroadcastq {{.*}}, %ymm2
+; NO-AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309]
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f32xi16_i64:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpbroadcastq {{.*}}, %zmm1
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
;
-; AVX-LABEL: f32xi16_i64:
+; AVX-64-LABEL: f32xi16_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f32xi16_i64:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f32xi16_i64:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
+ %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a
+ %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1
+ ret <32 x i16> %res2
+}
+
+
+define <32 x i16> @f32xi16_i128(<32 x i16> %a) {
+; AVX-LABEL: f32xi16_i128:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7]
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -645,87 +980,151 @@ define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
- %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a
- %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1
- ret <32 x i16> %res2
-}
-
-
-; ALL: .LCPI19
-; ALL-NEXT: .short 0 # 0x0
-; ALL-NEXT: .short 1 # 0x1
-; ALL-NEXT: .short 2 # 0x2
-; ALL-NEXT: .short 3 # 0x3
-; ALL-NEXT: .short 4 # 0x4
-; ALL-NEXT: .short 5 # 0x5
-; ALL-NEXT: .short 6 # 0x6
-; ALL-NEXT: .short 7 # 0x7
-; ALL-NOT: .short
-
-define <32 x i16> @f32xi16_i128(<32 x i16> %a) {
+; AVX-NEXT: retl
+;
; NO-AVX512BW-LABEL: f32xi16_i128:
; NO-AVX512BW: # BB#0:
-; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; NO-AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
;
; AVX512BW-LABEL: f32xi16_i128:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
+;
+; AVX-64-LABEL: f32xi16_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f32xi16_i128:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; NO-AVX512BW-64-NEXT: # ymm2 = mem[0,1,0,1]
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f32xi16_i128:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
%res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a
%res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1
ret <32 x i16> %res2
}
-; AVX512BW: .LCPI20
-; AVX512BW-NEXT: .short 0 # 0x0
-; AVX512BW-NEXT: .short 1 # 0x1
-; AVX512BW-NEXT: .short 2 # 0x2
-; AVX512BW-NEXT: .short 3 # 0x3
-; AVX512BW-NEXT: .short 4 # 0x4
-; AVX512BW-NEXT: .short 5 # 0x5
-; AVX512BW-NEXT: .short 6 # 0x6
-; AVX512BW-NEXT: .short 7 # 0x7
-; AVX512BW-NEXT: .short 8 # 0x8
-; AVX512BW-NEXT: .short 9 # 0x9
-; AVX512BW-NEXT: .short 10 # 0xa
-; AVX512BW-NEXT: .short 11 # 0xb
-; AVX512BW-NEXT: .short 12 # 0xc
-; AVX512BW-NEXT: .short 13 # 0xd
-; AVX512BW-NEXT: .short 14 # 0xe
-; AVX512BW-NEXT: .short 15 # 0xf
-; AVX512BW-NOT: .short
-
define <32 x i16> @f32xi16_i256(<32 x i16> %a) {
+; AVX-LABEL: f32xi16_i256:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15]
+; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpaddw %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddw %xmm4, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
+; NO-AVX512BW-LABEL: f32xi16_i256:
+; NO-AVX512BW: # BB#0:
+; NO-AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-NEXT: retl
+;
; AVX512BW-LABEL: f32xi16_i256:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retl
+;
+; AVX-64-LABEL: f32xi16_i256:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddw %xmm4, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddw %xmm4, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; NO-AVX512BW-64-LABEL: f32xi16_i256:
+; NO-AVX512BW-64: # BB#0:
+; NO-AVX512BW-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; NO-AVX512BW-64-NEXT: retq
+;
+; AVX512BW-64-LABEL: f32xi16_i256:
+; AVX512BW-64: # BB#0:
+; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-64-NEXT: retq
%res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %a
%res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %res1
ret <32 x i16> %res2
}
-; ALL64: .LCPI21
-; ALL64-NEXT: .quad 4294967296 # 0x100000000
-
-; ALL32: .LCPI21
-; ALL32-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
-
-; AVX: .LCPI21
-; AVX-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
+; AVX-LABEL: f4xi32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f4xi32_i64:
; ALL32: # BB#0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
@@ -733,40 +1132,26 @@ define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f4xi32_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f4xi32_i64:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastq {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967296,4294967296]
; ALL64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f4xi32_i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %a
%res2 = and <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %res1
ret <4 x i32> %res2
}
-; ALL64: .LCPI22
-; ALL64-NEXT: .quad 4294967296 # 0x100000000
-
-; ALL32: .LCPI22
-; ALL32-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
-
-; AVX: .LCPI22
-; AVX-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
-
define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
-; ALL-LABEL: f8xi32_i64:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq {{.*}}, %ymm1
-; ALL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
-;
; AVX-LABEL: f8xi32_i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -775,59 +1160,154 @@ define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f8xi32_i64:
+; ALL32: # BB#0:
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314]
+; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f8xi32_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f8xi32_i64:
+; ALL64: # BB#0:
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967296,4294967296,4294967296,4294967296]
+; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a
%res2 = and <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1
ret <8 x i32> %res2
}
-; ALL: .LCPI23
-; ALL-NEXT: .long 0 # 0x0
-; ALL-NEXT: .long 1 # 0x1
-; ALL-NEXT: .long 2 # 0x2
-; ALL-NEXT: .long 3 # 0x3
-; ALL-NOT: .long
-
define <8 x i32> @f8xi32_i128(<8 x i32> %a) {
-; ALL-LABEL: f8xi32_i128:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
-; ALL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-LABEL: f8xi32_i128:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
+; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f8xi32_i128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3]
+; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f8xi32_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3]
+; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f8xi32_i128:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: retq
%res1 = add <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
%res2 = and <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
ret <8 x i32> %res2
}
-; ALL64: .LCPI24
-; ALL64-NEXT: .quad 4294967296 # 0x100000000
-
-; ALL32: .LCPI24
-; ALL32-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
-
-; AVX: .LCPI24
-; AVX-NEXT: .quad 4294967296 # double 2.1219957909652723E-314
-
define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
+; AVX-LABEL: f16xi32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
; AVX2-LABEL: f16xi32_i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastq {{.*}}, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314]
; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retl
;
; AVX512-LABEL: f16xi32_i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpbroadcastq {{.*}}, %zmm1
+; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retl
;
-; AVX-LABEL: f16xi32_i64:
+; AVX-64-LABEL: f16xi32_i64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f16xi32_i64:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
+; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f16xi32_i64:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296]
+; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: retq
+ %res1 = add <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a
+ %res2 = and <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1
+ ret <16 x i32> %res2
+}
+
+
+define <16 x i32> @f16xi32_i128(<16 x i32> %a) {
+; AVX-LABEL: f16xi32_i128:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3]
; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -835,51 +1315,103 @@ define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
- %res1 = add <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a
- %res2 = and <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1
- ret <16 x i32> %res2
-}
-
-
-; ALL: .LCPI25
-; ALL-NEXT: .long 0 # 0x0
-; ALL-NEXT: .long 1 # 0x1
-; ALL-NEXT: .long 2 # 0x2
-; ALL-NEXT: .long 3 # 0x3
-; ALL-NOT: .long
-
-define <16 x i32> @f16xi32_i128(<16 x i32> %a) {
+; AVX-NEXT: retl
+;
; AVX2-LABEL: f16xi32_i128:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
+; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retl
;
; AVX512-LABEL: f16xi32_i128:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retl
+;
+; AVX-64-LABEL: f16xi32_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3]
+; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f16xi32_i128:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
+; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f16xi32_i128:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
%res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
ret <16 x i32> %res2
}
-; ALL64: .LCPI26
-; ALL64-NEXT: .quad 0 # 0x0
-; ALL64-NEXT: .quad 1 # 0x1
-; ALL64-NOT: .quad
-
define <4 x i64> @f4xi64_i128(<4 x i64> %a) {
+; AVX-LABEL: f4xi64_i128:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f4xi64_i128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0]
+; ALL32-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f4xi64_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-64-NEXT: movl $1, %eax
+; AVX-64-NEXT: vmovq %rax, %xmm2
+; AVX-64-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f4xi64_i128:
; ALL64: # BB#0:
-; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,0,1]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
; ALL64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -889,15 +1421,62 @@ define <4 x i64> @f4xi64_i128(<4 x i64> %a) {
}
-; ALL64: .LCPI27
-; ALL64-NEXT: .quad 0 # 0x0
-; ALL64-NEXT: .quad 1 # 0x1
-; ALL64-NOT: .quad
-
define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
+; AVX-LABEL: f8xi64_i128:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0]
+; AVX-NEXT: vextractf128 $1, %ymm2, %xmm3
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX-NEXT: vpaddq %xmm3, %xmm4, %xmm4
+; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX-NEXT: vpaddq %xmm3, %xmm4, %xmm3
+; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
+; AVX2-LABEL: f8xi64_i128:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0]
+; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retl
+;
+; AVX512-LABEL: f8xi64_i128:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0]
+; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retl
+;
+; AVX-64-LABEL: f8xi64_i128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: movl $1, %eax
+; AVX-64-NEXT: vmovq %rax, %xmm3
+; AVX-64-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1]
+; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
; AVX2-64-LABEL: f8xi64_i128:
; AVX2-64: # BB#0:
-; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,0,1]
+; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -906,57 +1485,99 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
;
; AVX512F-64-LABEL: f8xi64_i128:
; AVX512F-64: # BB#0:
-; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
-;
-; AVX512BW-64-LABEL: f8xi64_i128:
-; AVX512BW-64: # BB#0:
-; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; AVX512BW-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-64-NEXT: retq
%res1 = add <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %a
%res2 = and <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %res1
ret <8 x i64> %res2
}
-; ALL64: .LCPI28
-; ALL64-NEXT: .quad 0 # 0x0
-; ALL64-NEXT: .quad 1 # 0x1
-; ALL64-NEXT: .quad 2 # 0x2
-; ALL64-NEXT: .quad 3 # 0x3
-; ALL64-NOT: .quad
-
define <8 x i64> @f8xi64_i256(<8 x i64> %a) {
+; AVX-LABEL: f8xi64_i256:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,2,0,3,0]
+; AVX-NEXT: vextractf128 $1, %ymm2, %xmm3
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX-NEXT: vpaddq %xmm3, %xmm4, %xmm4
+; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX-NEXT: vpaddq %xmm3, %xmm4, %xmm3
+; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: retl
+;
+; AVX2-LABEL: f8xi64_i256:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,2,0,3,0]
+; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retl
+;
+; AVX512-LABEL: f8xi64_i256:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,0,0,1,0,2,0,3,0]
+; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retl
+;
+; AVX-64-LABEL: f8xi64_i256:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3]
+; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: movl $1, %eax
+; AVX-64-NEXT: vmovq %rax, %xmm4
+; AVX-64-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX-64-NEXT: vpaddq %xmm4, %xmm0, %xmm0
+; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3]
+; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f8xi64_i256:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3]
+; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: retq
+;
; AVX512F-64-LABEL: f8xi64_i256:
; AVX512F-64: # BB#0:
-; AVX512F-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
-;
-; AVX512BW-64-LABEL: f8xi64_i256:
-; AVX512BW-64: # BB#0:
-; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
-; AVX512BW-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
-; AVX512BW-64-NEXT: retq
%res1 = add <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %a
%res2 = and <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %res1
ret <8 x i64> %res2
}
-; ALL: .LCPI29
-; ALL-NEXT: .quad 4575657222482165760
-
-; AVX: .LCPI29
-; AVX-NEXT: .quad 4575657222482165760 # double 0.0078125018626451492
-
define <4 x float> @f4xf32_f64(<4 x float> %a) {
+; AVX-LABEL: f4xf32_f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f4xf32_f64:
; ALL32: # BB#0:
; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
@@ -964,221 +1585,367 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
; ALL32-NEXT: vdivps %xmm0, %xmm1, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f4xf32_f64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f4xf32_f64:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastq {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
; ALL64-NEXT: vaddps %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vdivps %xmm0, %xmm1, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f4xf32_f64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
%res1 = fadd <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %a
%res2 = fdiv <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %res1
ret <4 x float> %res2
}
-; ALL64: .LCPI30
-; ALL64-NEXT: .quad 4575657222482165760 # 0x3f80000040000000
-
-; ALL32: .LCPI30
-; ALL32-NEXT: .quad 4575657222482165760 # double 0.0078125018626451492
-
-; AVX: .LCPI30
-; AVX-NEXT: .quad 4575657222482165760 # double 0.0078125018626451492
-
define <8 x float> @f8xf32_f64(<8 x float> %a) {
-; ALL-LABEL: f8xf32_f64:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd {{.*}}, %ymm1
-; ALL-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vdivps %ymm0, %ymm1, %ymm0
-;
; AVX-LABEL: f8xf32_f64:
; AVX: # BB#0:
-; AVX-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f8xf32_f64:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f8xf32_f64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f8xf32_f64:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; ALL64-NEXT: retq
%res1 = fadd <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a
%res2 = fdiv <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1
ret <8 x float> %res2
}
-; ALL: .LCPI31
-; ALL-NEXT: .long 1082130432 # float 4
-; ALL-NEXT: .long 1065353216 # float 1
-; ALL-NEXT: .long 1073741824 # float 2
-; ALL-NEXT: .long 1077936128 # float 3
-; ALL-NOT: .long
-
define <8 x float> @f8xf32_f128(<8 x float> %a) {
-; ALL-LABEL: f8xf32_f128:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
-; ALL-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vdivps %ymm0, %ymm1, %ymm0
-;
; AVX-LABEL: f8xf32_f128:
; AVX: # BB#0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f8xf32_f128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f8xf32_f128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
+; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f8xf32_f128:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; ALL64-NEXT: retq
%res1 = fadd <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a
%res2 = fdiv <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1
ret <8 x float> %res2
}
-; ALL64: .LCPI32
-; ALL64-NEXT: .quad 4575657222482165760 # 0x3f80000040000000
-
-; ALL32: .LCPI32
-; ALL32-NEXT: .quad 4575657222482165760 # double 0.0078125018626451492
-
-; AVX: .LCPI32
-; AVX-NEXT: .quad 4575657222482165760 # double 0.0078125018626451492
-
define <16 x float> @f16xf32_f64(<16 x float> %a) {
+; AVX-LABEL: f16xf32_f64:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-NEXT: retl
+;
; AVX2-LABEL: f16xf32_f64:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd {{.*}}, %ymm2
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: retl
;
; AVX512-LABEL: f16xf32_f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastsd {{.*}}, %zmm1
+; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retl
;
-; AVX-LABEL: f16xf32_f64:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm2
-; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
-; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
-; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-64-LABEL: f16xf32_f64:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f16xf32_f64:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f16xf32_f64:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = fadd <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a
%res2 = fdiv <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1
ret <16 x float> %res2
}
-; ALL: .LCPI33
-; ALL-NEXT: .long 1082130432 # float 4
-; ALL-NEXT: .long 1065353216 # float 1
-; ALL-NEXT: .long 1073741824 # float 2
-; ALL-NEXT: .long 1077936128 # float 3
-; ALL-NOT: .long
-
define <16 x float> @f16xf32_f128(<16 x float> %a) {
+; AVX-LABEL: f16xf32_f128:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-NEXT: retl
+;
; AVX2-LABEL: f16xf32_f128:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: retl
;
; AVX512-LABEL: f16xf32_f128:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retl
;
-; AVX-LABEL: f16xf32_f128:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
-; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
-; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
-; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-64-LABEL: f16xf32_f128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f16xf32_f128:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f16xf32_f128:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = fadd <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a
%res2 = fdiv <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1
ret <16 x float> %res2
}
-; AVX512: .LCPI34
-; AVX512-NEXT: .long 1090519040 # float 8
-; AVX512-NEXT: .long 1065353216 # float 1
-; AVX512-NEXT: .long 1073741824 # float 2
-; AVX512-NEXT: .long 1077936128 # float 3
-; AVX512-NEXT: .long 1082130432 # float 4
-; AVX512-NEXT: .long 1084227584 # float 5
-; AVX512-NEXT: .long 1086324736 # float 6
-; AVX512-NEXT: .long 1088421888 # float 7
-; AVX512-NOT: .long
-
define <16 x float> @f16xf32_f256(<16 x float> %a) {
+; AVX-LABEL: f16xf32_f256:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-NEXT: retl
+;
+; AVX2-LABEL: f16xf32_f256:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: retl
+;
; AVX512-LABEL: f16xf32_f256:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retl
+;
+; AVX-64-LABEL: f16xf32_f256:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f16xf32_f256:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f16xf32_f256:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = fadd <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %a
%res2 = fdiv <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %res1
ret <16 x float> %res2
}
-; ALL: .LCPI35
-; ALL-NEXT: .quad 4611686018427387904 # double 2
-; ALL-NEXT: .quad 4607182418800017408 # double 1
-; ALL-NOT: .quad
-
define <4 x double> @f4xf64_f128(<4 x double> %a) {
-; ALL-LABEL: f4xf64_f128:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
-; ALL-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; ALL-NEXT: vdivpd %ymm0, %ymm1, %ymm0
-;
; AVX-LABEL: f4xf64_f128:
; AVX: # BB#0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: retl
+;
+; ALL32-LABEL: f4xf64_f128:
+; ALL32: # BB#0:
+; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; ALL32-NEXT: retl
+;
+; AVX-64-LABEL: f4xf64_f128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
+; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; AVX-64-NEXT: retq
+;
+; ALL64-LABEL: f4xf64_f128:
+; ALL64: # BB#0:
+; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
+; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; ALL64-NEXT: retq
%res1 = fadd <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %a
%res2 = fdiv <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %res1
ret <4 x double> %res2
}
-; ALL: .LCPI36
-; ALL-NEXT: .quad 4611686018427387904 # double 2
-; ALL-NEXT: .quad 4607182418800017408 # double 1
-; ALL-NOT: .quad
-
define <8 x double> @f8xf64_f128(<8 x double> %a) {
+; AVX-LABEL: f8xf64_f128:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX-NEXT: retl
+;
; AVX2-LABEL: f8xf64_f128:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: retl
;
; AVX512-LABEL: f8xf64_f128:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retl
;
-; AVX-LABEL: f8xf64_f128:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
-; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
-; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
-; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
-; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX-64-LABEL: f8xf64_f128:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f8xf64_f128:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
+; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f8xf64_f128:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = fadd <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %a
%res2 = fdiv <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %res1
ret <8 x double> %res2
@@ -1193,11 +1960,57 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
; AVX512-NOT: .quad
define <8 x double> @f8xf64_f256(<8 x double> %a) {
+; AVX-LABEL: f8xf64_f256:
+; AVX: # BB#0:
+; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX-NEXT: retl
+;
+; AVX2-LABEL: f8xf64_f256:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: retl
+;
; AVX512-LABEL: f8xf64_f256:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retl
+;
+; AVX-64-LABEL: f8xf64_f256:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX-64-NEXT: retq
+;
+; AVX2-64-LABEL: f8xf64_f256:
+; AVX2-64: # BB#0:
+; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
+; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
+; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1
+; AVX2-64-NEXT: retq
+;
+; AVX512F-64-LABEL: f8xf64_f256:
+; AVX512F-64: # BB#0:
+; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
+; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
+; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; AVX512F-64-NEXT: retq
%res1 = fadd <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %a
%res2 = fdiv <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %res1
ret <8 x double> %res2
@@ -1205,32 +2018,34 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
-; ALL: .LCPI38
-; ALL-NEXT: .long 4290379776 # 0xffba0000
-
-; AVX: .LCPI38
-; AVX-NEXT: .long 4290379776 # float NaN
-
define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) {
+; AVX-LABEL: f8xi16_i32_NaN:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retl
+;
; ALL32-LABEL: f8xi16_i32_NaN:
; ALL32: # BB#0:
-; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
+; AVX-64-LABEL: f8xi16_i32_NaN:
+; AVX-64: # BB#0:
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-64-NEXT: retq
+;
; ALL64-LABEL: f8xi16_i32_NaN:
; ALL64: # BB#0:
-; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
-;
-; AVX-LABEL: f8xi16_i32_NaN:
-; AVX: # BB#0:
-; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
-; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
%res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a
%res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1
ret <8 x i16> %res2
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index 16e261bf3c5..cd4b02ca833 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -290,19 +290,19 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
@@ -361,7 +361,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
@@ -370,7 +370,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -379,7 +379,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: retq # sched: [1:1.00]
@@ -468,7 +468,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -480,7 +480,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
@@ -493,7 +493,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -504,7 +504,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SKX-LABEL: v4f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -552,19 +552,19 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
@@ -630,7 +630,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: retq # sched: [1:1.00]
@@ -639,7 +639,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
@@ -648,7 +648,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: retq # sched: [1:1.00]
@@ -750,7 +750,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -762,7 +762,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
@@ -775,7 +775,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; KNL-LABEL: v8f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -786,7 +786,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SKX-LABEL: v8f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index 440a6f0bef1..c3c8fa3016a 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -415,7 +415,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@@ -425,7 +425,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@@ -435,7 +435,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@@ -514,7 +514,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@@ -525,7 +525,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@@ -536,7 +536,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@@ -635,7 +635,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -648,7 +648,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -662,7 +662,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -674,7 +674,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -751,7 +751,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@@ -761,7 +761,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@@ -771,7 +771,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@@ -859,7 +859,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@@ -870,7 +870,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@@ -881,7 +881,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@@ -994,7 +994,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -1007,7 +1007,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@@ -1021,7 +1021,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -1033,7 +1033,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
diff --git a/llvm/test/CodeGen/X86/vec_shift6.ll b/llvm/test/CodeGen/X86/vec_shift6.ll
index b4a58deff2f..731760a4ea5 100644
--- a/llvm/test/CodeGen/X86/vec_shift6.ll
+++ b/llvm/test/CodeGen/X86/vec_shift6.ll
@@ -153,14 +153,16 @@ define <32 x i16> @test7(<32 x i16> %a) {
;
; AVX2-LABEL: test7:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
+; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test7:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
+; AVX512-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX512-NEXT: retq
@@ -183,7 +185,8 @@ define <16 x i32> @test8(<16 x i32> %a) {
;
; AVX2-LABEL: test8:
; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,1,2,3,1,1,2,3]
+; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
OpenPOWER on IntegriCloud