diff options
author | Igor Breger <igor.breger@intel.com> | 2016-01-25 10:17:11 +0000 |
---|---|---|
committer | Igor Breger <igor.breger@intel.com> | 2016-01-25 10:17:11 +0000 |
commit | 6d421419db33a98c52a05a2aefd74492473d0512 (patch) | |
tree | b50bc5f7f492b410e42b20659e75768ec53f0047 /llvm/lib/Target | |
parent | d3e1dede4a28dfffedc8b49177c2abb79a0649e7 (diff) | |
download | bcm5719-llvm-6d421419db33a98c52a05a2aefd74492473d0512.tar.gz bcm5719-llvm-6d421419db33a98c52a05a2aefd74492473d0512.zip |
AVX1 : Enable vector masked_load/store to AVX1.
Use AVX1 FP instructions (vmaskmovps/pd) in place of the AVX2 int instructions (vpmaskmovd/q).
Differential Revision: http://reviews.llvm.org/D16528
llvm-svn: 258675
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 149 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 |
2 files changed, 41 insertions, 110 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6a7c45665e9..345e8d0ba21 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8703,116 +8703,47 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256>, VEX_W; -def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), - (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), - (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)), - (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>; - -def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)), - (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), - (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), - (bc_v8f32 (v8i32 immAllZerosV)))), - (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))), - (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))), - (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), - (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), - (bc_v4f32 (v4i32 immAllZerosV)))), - (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))), - (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr), - VR128:$mask)>; - -def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), - (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))), - (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))), - (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr), - VR128:$mask)>; - -def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), - (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), - (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), - (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), - (v4f64 immAllZerosV))), - (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))), - (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), - (bc_v4i64 (v8i32 immAllZerosV)))), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))), - (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)), - (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>; - -def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)), - (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>; - -def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), - (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), - (v2f64 immAllZerosV))), - (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))), - (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr), - VR128:$mask)>; - -def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), - (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), - (bc_v2i64 (v4i32 immAllZerosV)))), - (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; - -def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))), - (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr), - VR128:$mask)>; +multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, + ValueType MaskVT, string BlendStr, ValueType ZeroVT> { + // masked store + def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)), + (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; + // masked load + def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), + (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; + def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), + (VT (bitconvert (ZeroVT immAllZerosV))))), + (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; + def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))), + (!cast<Instruction>(BlendStr#"rr") + RC:$src0, + (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr), + RC:$mask)>; +} +let Predicates = [HasAVX] in { + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>; +} +let Predicates = [HasAVX1Only] in { + // zero vector created as v8f32 (base on X86TargetLowering::LowerBUILD_VECTOR) + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8f32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8f32>; + // load/store i32/i64 not supported use ps/pd version + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8f32>; + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; +} +let Predicates = [HasAVX2] in { + // zero vector created as v8i32 (base on X86TargetLowering::LowerBUILD_VECTOR) + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>; + defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; + defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; + defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; + defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; +} //===----------------------------------------------------------------------===// // Variable Bit Shifts // diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2e7bbb20874..397a0f2dba2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1438,7 +1438,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { int DataWidth = isa<PointerType>(ScalarTy) ? DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits(); - return (DataWidth >= 32 && ST->hasAVX2()); + return (DataWidth >= 32 && ST->hasAVX()); } bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { |