diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 16:46:25 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 16:46:25 +0000 |
| commit | 820f87a72d872987a95d1e5931f1dd7656f2ec0a (patch) | |
| tree | 1d46ae62cb0fac56b5c103fd081e48c45f31423b /llvm/test/CodeGen/AArch64/bitreverse.ll | |
| parent | c27f1b7182b41a262b8415afc03d21aa20326317 (diff) | |
| download | bcm5719-llvm-820f87a72d872987a95d1e5931f1dd7656f2ec0a.tar.gz bcm5719-llvm-820f87a72d872987a95d1e5931f1dd7656f2ec0a.zip | |
[SelectionDAG] Optimization of BITREVERSE legalization for power-of-2 integer scalar/vector types
An extension of D19978, this patch replaces the default BITREVERSE evaluation of individual bit masks+shifts with block mask+shifts when we have integer elements of power-of-2 bits in size.
After calling BSWAP to reverse the order of the constituent bytes (which typically follows a similar approach), every neighbouring 4-bits, 2-bits and finally 1-bit pairs are masked off and swapped over with shifts.
In doing so we can significantly reduce the number of operations required.
Differential Revision: https://reviews.llvm.org/D21578
llvm-svn: 276432
Diffstat (limited to 'llvm/test/CodeGen/AArch64/bitreverse.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/bitreverse.ll | 96 |
1 files changed, 41 insertions, 55 deletions
diff --git a/llvm/test/CodeGen/AArch64/bitreverse.ll b/llvm/test/CodeGen/AArch64/bitreverse.ll index 2eee7cfd8b9..135bce3bdb6 100644 --- a/llvm/test/CodeGen/AArch64/bitreverse.ll +++ b/llvm/test/CodeGen/AArch64/bitreverse.ll @@ -15,29 +15,28 @@ define <2 x i16> @f(<2 x i16> %a) { declare i8 @llvm.bitreverse.i8(i8) readnone -; Unfortunately some of the shift-and-inserts become BFIs, and some do not :( define i8 @g(i8 %a) { ; CHECK-LABEL: g: -; CHECK-DAG: lsr [[S5:w.*]], w0, #5 -; CHECK-DAG: lsr [[S4:w.*]], w0, #4 -; CHECK-DAG: lsr [[S3:w.*]], w0, #3 -; CHECK-DAG: lsr [[S2:w.*]], w0, #2 -; CHECK-DAG: lsl [[L1:w.*]], w0, #29 -; CHECK-DAG: lsl [[L2:w.*]], w0, #19 -; CHECK-DAG: lsl [[L3:w.*]], w0, #17 +; CHECK-DAG: rev [[RV:w.*]], w0 +; CHECK-DAG: and [[L4:w.*]], [[RV]], #0xf0f0f0f +; CHECK-DAG: and [[H4:w.*]], [[RV]], #0xf0f0f0f0 +; CHECK-DAG: lsr [[S4:w.*]], [[H4]], #4 +; CHECK-DAG: orr [[R4:w.*]], [[S4]], [[L4]], lsl #4 -; CHECK-DAG: and [[T1:w.*]], [[L1]], #0x40000000 -; CHECK-DAG: bfi [[T1]], w0, #31, #1 -; CHECK-DAG: bfi [[T1]], [[S2]], #29, #1 -; CHECK-DAG: bfi [[T1]], [[S3]], #28, #1 -; CHECK-DAG: bfi [[T1]], [[S4]], #27, #1 -; CHECK-DAG: bfi [[T1]], [[S5]], #26, #1 -; CHECK-DAG: and [[T2:w.*]], [[L2]], #0x2000000 -; CHECK-DAG: and [[T3:w.*]], [[L3]], #0x1000000 -; CHECK-DAG: orr [[T4:w.*]], [[T1]], [[T2]] -; CHECK-DAG: orr [[T5:w.*]], [[T4]], [[T3]] -; CHECK: lsr w0, [[T5]], #24 +; CHECK-DAG: and [[L2:w.*]], [[R4]], #0x33333333 +; CHECK-DAG: and [[H2:w.*]], [[R4]], #0xcccccccc +; CHECK-DAG: lsr [[S2:w.*]], [[H2]], #2 +; CHECK-DAG: orr [[R2:w.*]], [[S2]], [[L2]], lsl #2 +; CHECK-DAG: mov [[P1:w.*]], #1426063360 +; CHECK-DAG: mov [[N1:w.*]], #-1442840576 +; CHECK-DAG: and [[L1:w.*]], [[R2]], [[P1]] +; CHECK-DAG: and [[H1:w.*]], [[R2]], [[N1]] +; CHECK-DAG: lsr [[S1:w.*]], [[H1]], #1 +; CHECK-DAG: orr [[R1:w.*]], [[S1]], [[L1]], lsl #1 + +; CHECK-DAG: lsr w0, [[R1]], #24 +; CHECK-DAG: ret %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b } @@ -45,44 +44,31 @@ define i8 @g(i8 %a) { declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone define <8 x i8> @g_vec(<8 x i8> %a) { -; Try and match as much of the sequence as precisely as possible. +; CHECK-DAG: movi [[M1:v.*]], #15 +; CHECK-DAG: movi [[M2:v.*]], #240 +; CHECK: and [[A1:v.*]], v0.8b, [[M1]] +; CHECK: and [[A2:v.*]], v0.8b, [[M2]] +; CHECK-DAG: shl [[L4:v.*]], [[A1]], #4 +; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4 +; CHECK-DAG: orr [[V4:v.*]], [[R4]], [[L4]] + +; CHECK-DAG: movi [[M3:v.*]], #51 +; CHECK-DAG: movi [[M4:v.*]], #204 +; CHECK: and [[A3:v.*]], [[V4]], [[M3]] +; CHECK: and [[A4:v.*]], [[V4]], [[M4]] +; CHECK-DAG: shl [[L2:v.*]], [[A3]], #2 +; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2 +; CHECK-DAG: orr [[V2:v.*]], [[R2]], [[L2]] -; CHECK-LABEL: g_vec: -; CHECK-DAG: movi [[M1:v.*]], #128 -; CHECK-DAG: movi [[M2:v.*]], #64 -; CHECK-DAG: movi [[M3:v.*]], #32 -; CHECK-DAG: movi [[M4:v.*]], #16 -; CHECK-DAG: movi [[M5:v.*]], #8{{$}} -; CHECK-DAG: movi [[M6:v.*]], #4{{$}} -; CHECK-DAG: movi [[M7:v.*]], #2{{$}} -; CHECK-DAG: movi [[M8:v.*]], #1{{$}} -; CHECK-DAG: shl [[S1:v.*]], v0.8b, #7 -; CHECK-DAG: shl [[S2:v.*]], v0.8b, #5 -; CHECK-DAG: shl [[S3:v.*]], v0.8b, #3 -; CHECK-DAG: shl [[S4:v.*]], v0.8b, #1 -; CHECK-DAG: ushr [[S5:v.*]], v0.8b, #1 -; CHECK-DAG: ushr [[S6:v.*]], v0.8b, #3 -; CHECK-DAG: ushr [[S7:v.*]], v0.8b, #5 -; CHECK-DAG: ushr [[S8:v.*]], v0.8b, #7 -; CHECK-DAG: and [[A1:v.*]], [[S1]], [[M1]] -; CHECK-DAG: and [[A2:v.*]], [[S2]], [[M2]] -; CHECK-DAG: and [[A3:v.*]], [[S3]], [[M3]] -; CHECK-DAG: and [[A4:v.*]], [[S4]], [[M4]] -; CHECK-DAG: and [[A5:v.*]], [[S5]], [[M5]] -; CHECK-DAG: and [[A6:v.*]], [[S6]], [[M6]] -; CHECK-DAG: and [[A7:v.*]], [[S7]], [[M7]] -; CHECK-DAG: and [[A8:v.*]], [[S8]], [[M8]] +; CHECK-DAG: movi [[M5:v.*]], #85 +; CHECK-DAG: movi [[M6:v.*]], #170 +; CHECK: and [[A5:v.*]], [[V2]], [[M5]] +; CHECK: and [[A6:v.*]], [[V2]], [[M6]] +; CHECK-DAG: shl [[L1:v.*]], [[A5]], #1 +; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1 +; CHECK: orr [[V1:v.*]], [[R1]], [[L1]] -; The rest can be ORRed together in any order; it's not worth the test -; maintenance to match them precisely. -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK-DAG: orr -; CHECK: ret +; CHECK: ret %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a) ret <8 x i8> %b } |

