summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-19 00:42:28 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-19 00:42:28 +0000
commit375aa90291275bf6f85f0f3ef1e82e5dce0472d1 (patch)
treea793a5bae1f0cc5e122acb7f4c22f609fbbf1149
parent4eeaec46f7de16898a2feb440d064cd1eac1dc2f (diff)
downloadbcm5719-llvm-375aa90291275bf6f85f0f3ef1e82e5dce0472d1.tar.gz
bcm5719-llvm-375aa90291275bf6f85f0f3ef1e82e5dce0472d1.zip
[X86] Remove all of the patterns that use X86ISD:FAND/FXOR/FOR/FANDN except for the ones needed for SSE1. Anything SSE2 or above uses the integer ISD opcode.
This removes 11721 bytes from the DAG isel table or 2.2% llvm-svn: 290073
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td44
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td129
2 files changed, 42 insertions, 131 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0fd95cdedd0..ad95ce9cc38 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4383,31 +4383,33 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
-multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
IsCommutable>, EVEX_4V;
- defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
- EVEX_4V;
- defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))),
- itins.rm>, EVEX_4V, EVEX_B;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
+ EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))),
+ itins.rm>, EVEX_4V, EVEX_B;
+ }
}
}
-multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -4418,7 +4420,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
}
-multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -4428,7 +4430,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
EVEX_4V, EVEX_B;
}
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, SizeItins itins,
bit IsCommutable = 0> {
let Predicates = [prd] in {
@@ -4493,13 +4495,13 @@ let isCodeGenOnly = 1 in {
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
SSE_ALU_ITINS_P, 1>;
}
-defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI,
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI,
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
SSE_ALU_ITINS_P, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI,
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0e82a1eed31..4e9c72587e1 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2859,80 +2859,6 @@ let Predicates = [HasAVX1Only] in {
}
let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
- def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
- (VANDPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
- (VORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
- (VXORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
- (VANDNPSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)),
- (VANDPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)),
- (VORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)),
- (VXORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)),
- (VANDNPSrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
- (VANDPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
- (VORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
- (VXORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
- (VANDNPDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)),
- (VANDPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)),
- (VORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)),
- (VXORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)),
- (VANDNPDrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
-
- def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)),
- (VANDPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)),
- (VORPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)),
- (VXORPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)),
- (VANDNPDYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)),
- (VANDPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)),
- (VORPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)),
- (VXORPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)),
- (VANDNPDYrm VR256:$src1, addr:$src2)>;
-}
-
-let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
(COPY_TO_REGCLASS (VANDPDrr
@@ -2970,24 +2896,6 @@ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
}
let Predicates = [UseSSE1] in {
- def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
- (ANDPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
- (ORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
- (XORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
- (ANDNPSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
- (ORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
- (XORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDNPSrm VR128:$src1, addr:$src2)>;
-
// Use packed logical operations for scalar ops.
def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
(COPY_TO_REGCLASS (ANDPSrr
@@ -3008,24 +2916,6 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
- (ANDPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
- (ORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
- (XORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
- (ANDNPDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)),
- (ANDPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)),
- (ORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)),
- (XORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)),
- (ANDNPDrm VR128:$src1, addr:$src2)>;
-
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
(COPY_TO_REGCLASS (ANDPDrr
@@ -3045,6 +2935,25 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
}
+// Patterns for packed operations when we don't have integer type available.
+def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
+ (ANDNPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
+ (ORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
+ (XORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDNPSrm VR128:$src1, addr:$src2)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud