summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-05-22 21:00:18 +0000
committerCraig Topper <craig.topper@intel.com>2019-05-22 21:00:18 +0000
commit93f38e1f1ae23623a03d456b5b46f19f5c500036 (patch)
tree1e49cf98e47629cc5960381bf3ba1f32c7e129f0 /llvm/lib
parented49f6d8e65a4d8c26621624252c5f5588d549a1 (diff)
downloadbcm5719-llvm-93f38e1f1ae23623a03d456b5b46f19f5c500036.tar.gz
bcm5719-llvm-93f38e1f1ae23623a03d456b5b46f19f5c500036.zip
[X86] Explcitly disable VEXTRACT instruction matching for an immediate of 0. Remove a bunch of isel patterns that become unnecessary.
We effectively had a second set of isel patterns that tried to use a regular store instruction and an extract_subreg instruction. Or a masked move and an extract_subreg. These patterns were intended to override the matching of VEXTRACT instructions by taking advantage of the priority of the explicit immediate 0 for the index. This patch instaed just disables the immediate 0 matchin the VEXTRACT patterns. This each of the component pieces of the larger patterns will match by themselves. This found a bug of sorts were we didn't use 128-bit store for 512->128 extract on KNL. Its unclear what the right thing here should be. Using the vextract avoids constraining the register allocator to use xmm0-15. But it always results in a longer encoding if the register allocator ends up choosing xmm0-15 anyway. llvm-svn: 361431
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td69
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td12
-rw-r--r--llvm/lib/Target/X86/X86InstrVecCompiler.td70
3 files changed, 8 insertions, 143 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 7c7c27340cd..97e696981b1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3739,75 +3739,6 @@ let Predicates = [HasVLX] in {
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
-multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
- X86VectorVTInfo To, X86VectorVTInfo Cast> {
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (extract_subvector
- (From.VT From.RC:$src), (iPTR 0)))),
- To.RC:$src0)),
- (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
- Cast.RC:$src0, Cast.KRCWM:$mask,
- (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (extract_subvector
- (From.VT From.RC:$src), (iPTR 0)))),
- Cast.ImmAllZerosV)),
- (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
- Cast.KRCWM:$mask,
- (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-}
-
-
-let Predicates = [HasVLX] in {
-// A masked extract from the first 128-bits of a 256-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 128-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 256-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
-}
-
// Move Int Doubleword to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index e3c75481f2e..c7f1021b87b 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -980,8 +980,10 @@ def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
- node:$index), [{}],
- EXTRACT_get_vextract128_imm>;
+ node:$index), [{
+ // Index 0 can be handled via extract_subreg.
+ return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract128_imm>;
def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index),
@@ -991,8 +993,10 @@ def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
- node:$index), [{}],
- EXTRACT_get_vextract256_imm>;
+ node:$index), [{
+ // Index 0 can be handled via extract_subreg.
+ return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract256_imm>;
def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index),
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 7cb0ec03d12..e6e9a92af32 100644
--- a/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -98,76 +98,6 @@ defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
-multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
- RegisterClass RC, ValueType DstTy,
- ValueType SrcTy, SubRegIndex SubIdx> {
- def : Pat<(alignedstore (DstTy (extract_subvector
- (SrcTy RC:$src), (iPTR 0))), addr:$dst),
- (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
- (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-
- def : Pat<(store (DstTy (extract_subvector
- (SrcTy RC:$src), (iPTR 0))), addr:$dst),
- (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
- (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
- defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>;
- defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
- // Special patterns for storing subvector extracts of lower 128-bits
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
- sub_xmm>;
- defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
- sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
- v4i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
- v8i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
- v16i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
- v32i8, sub_xmm>;
-
- // Special patterns for storing subvector extracts of lower 128-bits of 512.
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
- sub_xmm>;
- defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
- sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
- v8i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
- v16i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
- v32i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
- v64i8, sub_xmm>;
-
- // Special patterns for storing subvector extracts of lower 256-bits of 512.
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
- sub_ymm>;
- defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
- sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
- v8i64, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
- v16i32, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
- v32i16, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
- v64i8, sub_ymm>;
-}
-
// If we're inserting into an all zeros vector, just use a plain move which
// will zero the upper bits. A post-isel hook will take care of removing
// any moves that we can prove are unnecessary.
OpenPOWER on IntegriCloud