diff options
author | Lei Huang <lei@ca.ibm.com> | 2018-10-26 18:09:36 +0000 |
---|---|---|
committer | Lei Huang <lei@ca.ibm.com> | 2018-10-26 18:09:36 +0000 |
commit | de20843f6fe53795e27f4ddfc2a550b3497aee02 (patch) | |
tree | 28d7ce9844ff737cb9e88596c2c513ebf67a8cde /llvm/lib | |
parent | 3cc0e935c4d82836a8476a7aac3d51845755d8d0 (diff) | |
download | bcm5719-llvm-de20843f6fe53795e27f4ddfc2a550b3497aee02.tar.gz bcm5719-llvm-de20843f6fe53795e27f4ddfc2a550b3497aee02.zip |
[PowerPC] Improve BUILD_VECTOR of 4 i32s
Currently, for this node:
vector int test(int a, int b, int c, int d) {
return (vector int) { a, b, c, d };
}
we get this on Power9:
mtvsrdd 34, 5, 3
mtvsrdd 35, 6, 4
vmrgow 2, 3, 2
and this on Power8:
mtvsrwz 0, 3
mtvsrwz 1, 5
mtvsrwz 2, 4
mtvsrwz 3, 6
xxmrghd 34, 1, 0
xxmrghd 35, 3, 2
vmrgow 2, 3, 2
This can be improved to this on LE Power9:
rldimi 3, 4, 32, 0
rldimi 5, 6, 32, 0
mtvsrdd 34, 5, 3
and this on LE Power8
rldimi 3, 4, 32, 0
rldimi 5, 6, 32, 0
mtvsrd 34, 3
mtvsrd 35, 5
xxpermdi 34, 35, 34, 0
This patch updates the TD pattern to generate the optimized sequence for both
Power8 and Power9 on LE and BE.
Differential Revision: https://reviews.llvm.org/D53494
llvm-svn: 345414
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 7a3141abc1b..6a4586002b2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3873,10 +3873,11 @@ let AddedComplexity = 400 in { (COPY_TO_REGCLASS (MTVSRD $A), VSRC), (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0), - (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>; + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } @@ -3888,10 +3889,11 @@ let AddedComplexity = 400 in { (COPY_TO_REGCLASS (MTVSRD $B), VSRC), (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0), - (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), - (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>; + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } @@ -3944,10 +3946,9 @@ let AddedComplexity = 400 in { def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW - (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)), - (v4i32 - (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>; + (MTVSRDD + (RLDIMI AnyExts.B, AnyExts.A, 32, 0), + (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>; } let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { @@ -3957,10 +3958,9 @@ let AddedComplexity = 400 in { def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (VMRGOW - (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)), - (v4i32 - (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>; + (MTVSRDD + (RLDIMI AnyExts.C, AnyExts.D, 32, 0), + (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>; } // P9 Altivec instructions that can be used to build vectors. // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete |