diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-19 00:42:28 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-19 00:42:28 +0000 |
commit | 375aa90291275bf6f85f0f3ef1e82e5dce0472d1 (patch) | |
tree | a793a5bae1f0cc5e122acb7f4c22f609fbbf1149 | |
parent | 4eeaec46f7de16898a2feb440d064cd1eac1dc2f (diff) | |
download | bcm5719-llvm-375aa90291275bf6f85f0f3ef1e82e5dce0472d1.tar.gz bcm5719-llvm-375aa90291275bf6f85f0f3ef1e82e5dce0472d1.zip |
[X86] Remove all of the patterns that use X86ISD:FAND/FXOR/FOR/FANDN except for the ones needed for SSE1. Anything SSE2 or above uses the integer ISD opcode.
This removes 11721 bytes from the DAG isel table or 2.2%
llvm-svn: 290073
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 44 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 129 |
2 files changed, 42 insertions, 131 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0fd95cdedd0..ad95ce9cc38 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4383,31 +4383,33 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; -multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr, IsCommutable>, EVEX_4V; - defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, - "$src2, $src1", "$src1, $src2", - (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>, - EVEX_4V; - defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))), - itins.rm>, EVEX_4V, EVEX_B; + let mayLoad = 1 in { + defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>, + EVEX_4V; + defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (OpNode _.RC:$src1, (_.VT (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))), + itins.rm>, EVEX_4V, EVEX_B; + } } } -multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, +multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -4418,7 +4420,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn } -multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, +multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -4428,7 +4430,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, EVEX_4V, EVEX_B; } -multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, Predicate prd, SizeItins itins, bit IsCommutable = 0> { let Predicates = [prd] in { @@ -4493,13 +4495,13 @@ let isCodeGenOnly = 1 in { defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, SSE_ALU_ITINS_P, 1>; } -defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI, +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI, +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, SSE_ALU_ITINS_P, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI, +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI, +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0e82a1eed31..4e9c72587e1 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2859,80 +2859,6 @@ let Predicates = [HasAVX1Only] in { } let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { - def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), - (VANDPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), - (VORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), - (VXORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), - (VANDNPSrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)), - (VANDPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)), - (VORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)), - (VXORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)), - (VANDNPSrm VR128:$src1, addr:$src2)>; - - def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), - (VANDPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), - (VORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), - (VXORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), - (VANDNPDrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)), - (VANDPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)), - (VORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)), - (VXORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)), - (VANDNPDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)), - (VANDPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)), - (VORPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)), - (VXORPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)), - (VANDNPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)), - (VANDPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)), - (VORPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)), - (VXORPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)), - (VANDNPSYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)), - (VANDPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)), - (VORPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)), - (VXORPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)), - (VANDNPDYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)), - (VANDPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)), - (VORPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)), - (VXORPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)), - (VANDNPDYrm VR256:$src1, addr:$src2)>; -} - -let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { // Use packed logical operations for scalar ops. def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), (COPY_TO_REGCLASS (VANDPDrr @@ -2970,24 +2896,6 @@ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { } let Predicates = [UseSSE1] in { - def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), - (ANDPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), - (ORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), - (XORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), - (ANDNPSrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), - (ANDPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), - (ORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), - (XORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), - (ANDNPSrm VR128:$src1, addr:$src2)>; - // Use packed logical operations for scalar ops. def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)), (COPY_TO_REGCLASS (ANDPSrr @@ -3008,24 +2916,6 @@ let Predicates = [UseSSE1] in { } let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), - (ANDPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), - (ORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), - (XORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), - (ANDNPDrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)), - (ANDPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)), - (ORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)), - (XORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)), - (ANDNPDrm VR128:$src1, addr:$src2)>; - // Use packed logical operations for scalar ops. def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), (COPY_TO_REGCLASS (ANDPDrr @@ -3045,6 +2935,25 @@ let Predicates = [UseSSE2] in { (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>; } +// Patterns for packed operations when we don't have integer type available. +def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), + (ANDPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), + (ORPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), + (XORPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), + (ANDNPSrr VR128:$src1, VR128:$src2)>; + +def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), + (ANDPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), + (ORPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), + (XORPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), + (ANDNPSrm VR128:$src1, addr:$src2)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions //===----------------------------------------------------------------------===// |