diff options
| author | wschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-10-13 01:40:11 +0000 |
|---|---|---|
| committer | wschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-10-13 01:40:11 +0000 |
| commit | 26036fbf93f291e4b743e8bce605bd82c7d6fdc3 (patch) | |
| tree | 0ee36b2ce1c384a06f10fe408d475cc394d82913 /libcpp | |
| parent | 4d773dc31e2c8f7dc20688fa5e01cfd1f8062082 (diff) | |
| download | ppe42-gcc-26036fbf93f291e4b743e8bce605bd82c7d6fdc3.tar.gz ppe42-gcc-26036fbf93f291e4b743e8bce605bd82c7d6fdc3.zip | |
2014-10-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Backport from mainline r215873
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* lex.c (search_line_fast): Add new version to be used for Power8
and later targets when Altivec is enabled. Restrict the existing
Altivec version to big-endian systems so that lvsr is not used on
little endian, where it is deprecated. Remove LE-specific code
from the now-BE-only version.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@216132 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
| -rw-r--r-- | libcpp/ChangeLog | 11 | ||||
| -rw-r--r-- | libcpp/lex.c | 115 |
2 files changed, 114 insertions, 12 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index b85cb67aecb..5ab8c13484b 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,4 +1,13 @@ -libcpp/ +2014-10-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline r215873 + 2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * lex.c (search_line_fast): Add new version to be used for Power8 + and later targets when Altivec is enabled. Restrict the existing + Altivec version to big-endian systems so that lvsr is not used on + little endian, where it is deprecated. Remove LE-specific code + from the now-BE-only version. 2014-10-08 Edward Smith-Rowland <3dw4rd@verizon.net> diff --git a/libcpp/lex.c b/libcpp/lex.c index 6d69b591fec..ec07c6e653f 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -515,9 +515,111 @@ init_vectorized_lexer (void) search_line_fast = impl; } -#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) +#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__) -/* A vection of the fast scanner using AltiVec vectorized byte compares. */ +/* A vection of the fast scanner using AltiVec vectorized byte compares + and VSX unaligned loads (when VSX is available). This is otherwise + the same as the pre-GCC 5 version. */ + +static const uchar * +search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) +{ + typedef __attribute__((altivec(vector))) unsigned char vc; + + const vc repl_nl = { + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' + }; + const vc repl_cr = { + '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', + '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' + }; + const vc repl_bs = { + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' + }; + const vc repl_qm = { + '?', '?', '?', '?', '?', '?', '?', '?', + '?', '?', '?', '?', '?', '?', '?', '?', + }; + const vc zero = { 0 }; + + vc data, t; + + /* Main loop processing 16 bytes at a time. */ + do + { + vc m_nl, m_cr, m_bs, m_qm; + + data = *((const vc *)s); + s += 16; + + m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl); + m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr); + m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs); + m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm); + t = (m_nl | m_cr) | (m_bs | m_qm); + + /* T now contains 0xff in bytes for which we matched one of the relevant + characters. We want to exit the loop if any byte in T is non-zero. + Below is the expansion of vec_any_ne(t, zero). */ + } + while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); + + /* Restore s to to point to the 16 bytes we just processed. */ + s -= 16; + + { +#define N (sizeof(vc) / sizeof(long)) + + union { + vc v; + /* Statically assert that N is 2 or 4. */ + unsigned long l[(N == 2 || N == 4) ? N : -1]; + } u; + unsigned long l, i = 0; + + u.v = t; + + /* Find the first word of T that is non-zero. */ + switch (N) + { + case 4: + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + case 2: + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + l = u.l[i]; + } + + /* L now contains 0xff in bytes for which we matched one of the + relevant characters. We can find the byte index by finding + its bit index and dividing by 8. */ +#ifdef __BIG_ENDIAN__ + l = __builtin_clzl(l) >> 3; +#else + l = __builtin_ctzl(l) >> 3; +#endif + return s + l; + +#undef N + } +} + +#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__) + +/* A vection of the fast scanner using AltiVec vectorized byte compares. + This cannot be used for little endian because vec_lvsl/lvsr are + deprecated for little endian and the code won't work properly. */ /* ??? Unfortunately, attribute(target("altivec")) is not yet supported, so we can't compile this function without -maltivec on the command line (or implied by some other switch). */ @@ -559,13 +661,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) beginning with all ones and shifting in zeros according to the mis-alignment. The LVSR instruction pulls the exact shift we want from the address. */ -#ifdef __BIG_ENDIAN__ mask = __builtin_vec_lvsr(0, s); mask = __builtin_vec_perm(zero, ones, mask); -#else - mask = __builtin_vec_lvsl(0, s); - mask = __builtin_vec_perm(ones, zero, mask); -#endif data &= mask; /* While altivec loads mask addresses, we still need to align S so @@ -629,11 +726,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) /* L now contains 0xff in bytes for which we matched one of the relevant characters. We can find the byte index by finding its bit index and dividing by 8. */ -#ifdef __BIG_ENDIAN__ l = __builtin_clzl(l) >> 3; -#else - l = __builtin_ctzl(l) >> 3; -#endif return s + l; #undef N |

