diff options
| author | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-09 22:54:55 +0000 |
|---|---|---|
| committer | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-09 22:54:55 +0000 |
| commit | ba2f8f6bb45dad549b9ba03546ff5d1f2ff4bad7 (patch) | |
| tree | b83ad479a56bee69877750fe78fab6e95bcec8c3 /libitm/config | |
| parent | 0cae34681778c193b7a391ab38f468626e191687 (diff) | |
| download | ppe42-gcc-ba2f8f6bb45dad549b9ba03546ff5d1f2ff4bad7.tar.gz ppe42-gcc-ba2f8f6bb45dad549b9ba03546ff5d1f2ff4bad7.zip | |
libitm: Remove unused code.
In particular, unused code that's presenting portability problems.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@181241 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libitm/config')
| -rw-r--r-- | libitm/config/alpha/cacheline.h | 86 | ||||
| -rw-r--r-- | libitm/config/alpha/unaligned.h | 118 | ||||
| -rw-r--r-- | libitm/config/generic/cacheline.cc | 49 | ||||
| -rw-r--r-- | libitm/config/generic/cacheline.h | 49 | ||||
| -rw-r--r-- | libitm/config/generic/cachepage.h | 77 | ||||
| -rw-r--r-- | libitm/config/generic/tls.cc | 47 | ||||
| -rw-r--r-- | libitm/config/generic/unaligned.h | 228 | ||||
| -rw-r--r-- | libitm/config/posix/cachepage.cc | 183 | ||||
| -rw-r--r-- | libitm/config/x86/cacheline.cc | 73 | ||||
| -rw-r--r-- | libitm/config/x86/cacheline.h | 123 | ||||
| -rw-r--r-- | libitm/config/x86/unaligned.h | 237 | ||||
| -rw-r--r-- | libitm/config/x86/x86_avx.cc | 59 | ||||
| -rw-r--r-- | libitm/config/x86/x86_sse.cc | 79 |
13 files changed, 3 insertions, 1405 deletions
diff --git a/libitm/config/alpha/cacheline.h b/libitm/config/alpha/cacheline.h index 5e38486b713..611a1c9a26e 100644 --- a/libitm/config/alpha/cacheline.h +++ b/libitm/config/alpha/cacheline.h @@ -33,90 +33,6 @@ // modification mask, below. #define CACHELINE_SIZE 64 -#ifdef __alpha_bwx__ -# include "config/generic/cacheline.h" -#else -// If we don't have byte-word stores, then we'll never be able to -// adjust *all* of the byte loads/stores to be truely atomic. So -// only guarantee 4-byte aligned values atomicly stored, exactly -// like the native system. Use byte zap instructions to accelerate -// sub-word masked stores. +#include "config/generic/cacheline.h" -namespace GTM HIDDEN { - -// A gtm_cacheline_mask stores a modified bit for every modified byte -// in the cacheline with which it is associated. -typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask; - -union gtm_cacheline -{ - // Byte access to the cacheline. - unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE))); - - // Larger sized access to the cacheline. - uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)]; - uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; - uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; - gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; - - // Store S into D, but only the bytes specified by M. - static void store_mask(uint32_t *d, uint32_t s, uint8_t m); - static void store_mask(uint64_t *d, uint64_t s, uint8_t m); - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - static void copy_mask_wb () { atomic_write_barrier(); } -}; - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) -{ - const uint8_t tm = (1 << sizeof(uint32_t)) - 1; - - m &= tm; - if (__builtin_expect (m, tm)) - { - if (__builtin_expect (m == tm, 1)) - *d = s; - else - *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); - } -} - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) -{ - if (__builtin_expect (m, 0xff)) - { - if (__builtin_expect (m == 0xff, 1)) - *d = s; - else - { - typedef uint32_t *p32 __attribute__((may_alias)); - p32 d32 = reinterpret_cast<p32>(d); - - if ((m & 0x0f) == 0x0f) - { - d32[0] = s; - m &= 0xf0; - } - else if ((m & 0xf0) == 0xf0) - { - d32[1] = s >> 32; - m &= 0x0f; - } - - if (m) - *d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m); - } - } -} - -} // namespace GTM - -#endif // __alpha_bwx__ #endif // LIBITM_ALPHA_CACHELINE_H diff --git a/libitm/config/alpha/unaligned.h b/libitm/config/alpha/unaligned.h deleted file mode 100644 index 3d091aee228..00000000000 --- a/libitm/config/alpha/unaligned.h +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef LIBITM_ALPHA_UNALIGNED_H -#define LIBITM_ALPHA_UNALIGNED_H 1 - -#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 - -#ifndef __alpha_bwx__ -#define HAVE_ARCH_UNALIGNED_STORE2_U2 1 -#endif -#define HAVE_ARCH_UNALIGNED_STORE2_U4 1 -#define HAVE_ARCH_UNALIGNED_STORE2_U8 1 - -#include "config/generic/unaligned.h" - -namespace GTM HIDDEN { - -template<> -inline uint16_t ALWAYS_INLINE -unaligned_load2<uint16_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs); -} - -template<> -inline uint32_t ALWAYS_INLINE -unaligned_load2<uint32_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs); -} - -template<> -inline uint64_t ALWAYS_INLINE -unaligned_load2<uint64_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - - return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs); -} - -#ifndef __alpha_bwx__ -template<> -inline void -unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint16_t val) -{ - uint32_t vl = (uint32_t)val << 24, vh = val >> 8; - - gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4); - gtm_cacheline::store_mask (&c2->u32[0], vh, 1); -} -#endif - -template<> -inline void -unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint32_t val) -{ - uint64_t vl = __builtin_alpha_insll (val, ofs); - uint64_t ml = __builtin_alpha_insll (~0u, ofs); - uint64_t vh = __builtin_alpha_inslh (val, ofs); - uint64_t mh = __builtin_alpha_inslh (~0u, ofs); - - gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); - gtm_cacheline::store_mask (&c2->u64[0], vh, mh); -} - -template<> -inline void -unaligned_store2<uint64_t>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint64_t val) -{ - uint64_t vl = __builtin_alpha_insql (val, ofs); - uint64_t ml = __builtin_alpha_insql (~0u, ofs); - uint64_t vh = __builtin_alpha_insqh (val, ofs); - uint64_t mh = __builtin_alpha_insqh (~0u, ofs); - - gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml); - gtm_cacheline::store_mask (&c2->u64[0], vh, mh); -} - -} // namespace GTM - -#endif // LIBITM_ALPHA_UNALIGNED_H diff --git a/libitm/config/generic/cacheline.cc b/libitm/config/generic/cacheline.cc deleted file mode 100644 index 108ffba3037..00000000000 --- a/libitm/config/generic/cacheline.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#include "libitm_i.h" - - -namespace GTM HIDDEN { - -void -gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m) -{ - const size_t n = sizeof (gtm_word); - - if (m == (gtm_cacheline_mask) -1) - { - *d = *s; - return; - } - if (__builtin_expect (m == 0, 0)) - return; - - for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) - store_mask (&d->w[i], s->w[i], m); -} - -} // namespace GTM diff --git a/libitm/config/generic/cacheline.h b/libitm/config/generic/cacheline.h index 0a5af761d6e..dd7d877d1d1 100644 --- a/libitm/config/generic/cacheline.h +++ b/libitm/config/generic/cacheline.h @@ -51,57 +51,8 @@ union gtm_cacheline uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)]; uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)]; gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)]; - - // Store S into D, but only the bytes specified by M. - template<typename T> static void store_mask (T *d, T s, uint8_t m); - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - // When we're emitting non-temporal stores, the normal strong - // ordering of the machine doesn't apply. - static void copy_mask_wb () { atomic_write_barrier(); } }; -template<typename T> -inline void -gtm_cacheline::store_mask (T *d, T s, uint8_t m) -{ - const uint8_t tm = (1 << sizeof(T)) - 1; - - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { - const int half = sizeof(T) / 2; - typedef typename sized_integral<half>::type half_t; - half_t *dhalf = reinterpret_cast<half_t *>(d); - half_t s1, s2; - - if (WORDS_BIGENDIAN) - s1 = s >> half*8, s2 = s; - else - s1 = s, s2 = s >> half*8; - - store_mask (dhalf, s1, m); - store_mask (dhalf + 1, s2, m >> half); - } - } -} - -template<> -inline void ALWAYS_INLINE -gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m) -{ - if (m & 1) - *d = s; -} - } // namespace GTM #endif // LIBITM_CACHELINE_H diff --git a/libitm/config/generic/cachepage.h b/libitm/config/generic/cachepage.h deleted file mode 100644 index a5472f3831b..00000000000 --- a/libitm/config/generic/cachepage.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef LIBITM_CACHEPAGE_H -#define LIBITM_CACHEPAGE_H 1 - -namespace GTM HIDDEN { - -// A "page" worth of saved cachelines plus modification masks. This -// arrangement is intended to minimize the overhead of alignment. The -// PAGE_SIZE defined by the target must be a constant for this to work, -// which means that this definition may not be the same as the real -// system page size. An additional define of FIXED_PAGE_SIZE by the -// target indicates that PAGE_SIZE exactly matches the system page size. - -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif - -struct gtm_cacheline_page -{ - static const size_t LINES - = ((PAGE_SIZE - sizeof(gtm_cacheline_page *)) - / (CACHELINE_SIZE + sizeof(gtm_cacheline_mask))); - - gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE))); - gtm_cacheline_mask masks[LINES]; - gtm_cacheline_page *prev; - - static gtm_cacheline_page * - page_for_line (gtm_cacheline *c) - { - return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE); - } - - gtm_cacheline_mask * - mask_for_line (gtm_cacheline *c) - { - size_t index = c - &this->lines[0]; - return &this->masks[index]; - } - - static gtm_cacheline_mask * - mask_for_page_line (gtm_cacheline *c) - { - gtm_cacheline_page *p = page_for_line (c); - return p->mask_for_line (c); - } - - static void *operator new (size_t); - static void operator delete (void *); -}; - -} // namespace GTM - -#endif // LIBITM_CACHEPAGE_H diff --git a/libitm/config/generic/tls.cc b/libitm/config/generic/tls.cc index c6421113c86..e502e50869b 100644 --- a/libitm/config/generic/tls.cc +++ b/libitm/config/generic/tls.cc @@ -30,51 +30,4 @@ namespace GTM HIDDEN { __thread gtm_thread_tls _gtm_thr_tls; #endif -// Filter out any updates that overlap the libitm stack, as defined by -// TOP (entry point to library) and BOT (below current function). This -// definition should be fine for all stack-grows-down architectures. - -gtm_cacheline_mask __attribute__((noinline)) -gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask) -{ - void *top = gtm_thr()->jb.cfa; - void *bot = __builtin_dwarf_cfa(); - - // We must have come through an entry point that set TOP. - assert (top != NULL); - - if (line + 1 < bot) - { - // Since we don't have the REAL stack boundaries for this thread, - // we cannot know if this is a dead write to a stack address below - // the current function or if it is write to another VMA. In either - // case allowing the write should not affect correctness. - } - else if (line >= top) - { - // A valid write to an address in an outer stack frame, or a write - // to another VMA. - } - else - { - uintptr_t diff = (uintptr_t)top - (uintptr_t)line; - if (diff >= CACHELINE_SIZE) - { - // The write is either fully within the proscribed area, or the tail - // of the cacheline overlaps the proscribed area. Assume that all - // stacks are at least cacheline aligned and declare the head of the - // cacheline dead. - mask = 0; - } - else - { - // The head of the cacheline is within the proscribed area, but the - // tail of the cacheline is live. Eliminate the dead writes. - mask &= (gtm_cacheline_mask)-1 << diff; - } - } - - return mask; -} - } // namespace GTM diff --git a/libitm/config/generic/unaligned.h b/libitm/config/generic/unaligned.h deleted file mode 100644 index 50cb13bd277..00000000000 --- a/libitm/config/generic/unaligned.h +++ /dev/null @@ -1,228 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef LIBITM_UNALIGNED_H -#define LIBITM_UNALIGNED_H 1 - -namespace GTM HIDDEN { - -#ifndef STRICT_ALIGNMENT -#define STRICT_ALIGNMENT 1 -#endif - -// A type trait for whether type T requires strict alignment. -// The generic types are assumed to all be the same; specializations -// for target-specific types should be done in config/cpu/unaligned.h. -template<typename T> - struct strict_alignment - : public std::integral_constant<bool, STRICT_ALIGNMENT> - { }; - -// A helper template for accessing an integral type the same size as T -template<typename T> - struct make_integral - : public sized_integral<sizeof(T)> - { }; - -// A helper class for accessing T as an unaligned value. -template<typename T> -struct __attribute__((packed)) unaligned_helper - { T x; }; - -// A helper class for view-converting T as an integer. -template<typename T> -union view_convert_helper -{ - typedef T type; - typedef make_integral<T> itype; - - type t; - itype i; -}; - -// Generate an unaligned load sequence. -// The compiler knows how to do this for any specific type. -template<typename T> -inline T ALWAYS_INLINE -unaligned_load(const void *t) -{ - typedef unaligned_helper<T> UT; - const UT *ut = reinterpret_cast<const UT *>(t); - return ut->x; -} - -// Generate an unaligned store sequence. -template<typename T> -inline void ALWAYS_INLINE -unaligned_store(void *t, T val) -{ - typedef unaligned_helper<T> UT; - UT *ut = reinterpret_cast<UT *>(t); - ut->x = val; -} - -// Generate an unaligned load from two different cachelines. -// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. -template<typename T> -inline T ALWAYS_INLINE -unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs) -{ - size_t left = CACHELINE_SIZE - ofs; - T ret; - - memcpy (&ret, &c1->b[ofs], left); - memcpy ((char *)&ret + ofs, c2, sizeof(T) - left); - - return ret; -} - -// Generate an unaligned store into two different cachelines. -// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE. -template<typename T> -inline void ALWAYS_INLINE -unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val) -{ - size_t left = CACHELINE_SIZE - ofs; - memcpy (&c1->b[ofs], &val, left); - memcpy (c2, (char *)&val + left, sizeof(T) - left); -} - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2 -template<> -inline uint16_t ALWAYS_INLINE -unaligned_load2<uint16_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint16_t v1 = c1->b[CACHELINE_SIZE - 1]; - uint16_t v2 = c2->b[0]; - - if (WORDS_BIGENDIAN) - return v1 << 8 | v2; - else - return v2 << 8 | v1; -} -#endif - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4 -template<> -inline uint32_t ALWAYS_INLINE -unaligned_load2<uint32_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - uint32_t v2 = c2->u32[0]; - int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8; - int s1 = sizeof(uint32_t) * 8 - s2; - - if (WORDS_BIGENDIAN) - return v1 << s2 | v2 >> s1; - else - return v2 << s2 | v1 >> s1; -} -#endif - -#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8 -template<> -inline uint64_t ALWAYS_INLINE -unaligned_load2<uint64_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - uint64_t v2 = c2->u64[0]; - int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8; - int s1 = sizeof(uint64_t) * 8 - s2; - - if (WORDS_BIGENDIAN) - return v1 << s2 | v2 >> s1; - else - return v2 << s2 | v1 >> s1; -} -#endif - -template<> -inline float ALWAYS_INLINE -unaligned_load2<float>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - typedef view_convert_helper<float> VC; VC vc; - vc.i = unaligned_load2<VC::itype>(c1, c2, ofs); - return vc.t; -} - -template<> -inline double ALWAYS_INLINE -unaligned_load2<double>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - typedef view_convert_helper<double> VC; VC vc; - vc.i = unaligned_load2<VC::itype>(c1, c2, ofs); - return vc.t; -} - -#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2 -template<> -inline void ALWAYS_INLINE -unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint16_t val) -{ - uint8_t vl = val, vh = val >> 8; - - if (WORDS_BIGENDIAN) - { - c1->b[CACHELINE_SIZE - 1] = vh; - c2->b[0] = vl; - } - else - { - c1->b[CACHELINE_SIZE - 1] = vl; - c2->b[0] = vh; - } -} -#endif - -#if 0 -#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4 -template<> -inline void ALWAYS_INLINE -unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, uint32_t val) -{ - // ??? We could reuse the store_mask stuff here. -} -#endif - -template<> -inline void ALWAYS_INLINE -unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2, - size_t ofs, float val) -{ - typedef view_convert_helper<float> VC; VC vc; - vc.t = val; - unaligned_store2(c1, c2, ofs, vc.i); -} -#endif - -} // namespace GTM - -#endif // LIBITM_UNALIGNED_H diff --git a/libitm/config/posix/cachepage.cc b/libitm/config/posix/cachepage.cc deleted file mode 100644 index 128cd5435ae..00000000000 --- a/libitm/config/posix/cachepage.cc +++ /dev/null @@ -1,183 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#include "libitm_i.h" -#include <pthread.h> - -// -// We have three possibilities for alloction: mmap, memalign, posix_memalign -// - -#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -#include <sys/mman.h> -#include <fcntl.h> -#endif -#ifdef HAVE_MALLOC_H -#include <malloc.h> -#endif - -namespace GTM HIDDEN { - -#if defined(HAVE_MMAP_ANON) -# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -# define MAP_ANONYMOUS MAP_ANON -# endif -# define dev_zero -1 -#elif defined(HAVE_MMAP_DEV_ZERO) -# ifndef MAP_ANONYMOUS -# define MAP_ANONYMOUS 0 -# endif -static int dev_zero = -1; -#endif - -#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -/* If we get here, we've already opened /dev/zero and verified that - PAGE_SIZE is valid for the system. */ -static gtm_cacheline_page * alloc_mmap (void) UNUSED; -static gtm_cacheline_page * -alloc_mmap (void) -{ - gtm_cacheline_page *r; - r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0); - if (r == (gtm_cacheline_page *) MAP_FAILED) - abort (); - return r; -} -#endif /* MMAP_ANON | MMAP_DEV_ZERO */ - -#ifdef HAVE_MEMALIGN -static gtm_cacheline_page * alloc_memalign (void) UNUSED; -static gtm_cacheline_page * -alloc_memalign (void) -{ - gtm_cacheline_page *r; - r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE); - if (r == NULL) - abort (); - return r; -} -#endif /* MEMALIGN */ - -#ifdef HAVE_POSIX_MEMALIGN -static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED; -static gtm_cacheline_page * -alloc_posix_memalign (void) -{ - void *r; - if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE)) - abort (); - return (gtm_cacheline_page *) r; -} -#endif /* POSIX_MEMALIGN */ - -#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE) -# define alloc_page alloc_mmap -#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE) -static gtm_cacheline_page * -alloc_page (void) -{ - if (dev_zero < 0) - { - dev_zero = open ("/dev/zero", O_RDWR); - assert (dev_zero >= 0); - } - return alloc_mmap (); -} -#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO) -static gtm_cacheline_page * (*alloc_page) (void); -static void __attribute__((constructor)) -init_alloc_page (void) -{ - size_t page_size = getpagesize (); - if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0) - { -# ifndef HAVE_MMAP_ANON - dev_zero = open ("/dev/zero", O_RDWR); - assert (dev_zero >= 0); -# endif - alloc_page = alloc_mmap; - return; - } -# ifdef HAVE_MEMALIGN - alloc_page = alloc_memalign; -# elif defined(HAVE_POSIX_MEMALIGN) - alloc_page = alloc_posix_memalign; -# else -# error "No fallback aligned memory allocation method" -# endif -} -#elif defined(HAVE_MEMALIGN) -# define alloc_page alloc_memalign -#elif defined(HAVE_POSIX_MEMALIGN) -# define alloc_page alloc_posix_memalign -#else -# error "No aligned memory allocation method" -#endif - -static gtm_cacheline_page *free_pages; -static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER; - -void * -gtm_cacheline_page::operator new (size_t size) -{ - assert (size == sizeof (gtm_cacheline_page)); - assert (size <= PAGE_SIZE); - - pthread_mutex_lock(&free_page_lock); - - gtm_cacheline_page *r = free_pages; - free_pages = r ? r->prev : NULL; - - pthread_mutex_unlock(&free_page_lock); - - if (r == NULL) - r = alloc_page (); - - return r; -} - -void -gtm_cacheline_page::operator delete (void *xhead) -{ - gtm_cacheline_page *head = static_cast<gtm_cacheline_page *>(xhead); - gtm_cacheline_page *tail; - - if (head == 0) - return; - - /* ??? We should eventually really free some of these. */ - - for (tail = head; tail->prev != 0; tail = tail->prev) - continue; - - pthread_mutex_lock(&free_page_lock); - - tail->prev = free_pages; - free_pages = head; - - pthread_mutex_unlock(&free_page_lock); -} - -} // namespace GTM diff --git a/libitm/config/x86/cacheline.cc b/libitm/config/x86/cacheline.cc deleted file mode 100644 index 2e49a355953..00000000000 --- a/libitm/config/x86/cacheline.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#include "libitm_i.h" - -namespace GTM HIDDEN { - -uint32_t const gtm_bit_to_byte_mask[16] = -{ - 0x00000000, - 0x000000ff, - 0x0000ff00, - 0x0000ffff, - 0x00ff0000, - 0x00ff00ff, - 0x00ffff00, - 0x00ffffff, - 0xff000000, - 0xff0000ff, - 0xff00ff00, - 0xff00ffff, - 0xffff0000, - 0xffff00ff, - 0xffffff00, - 0xffffffff -}; - -#ifdef __SSE2__ -# define MEMBER m128i -#else -# define MEMBER w -#endif - -void -gtm_cacheline::copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m) -{ - if (m == (gtm_cacheline_mask)-1) - { - *d = *s; - return; - } - if (__builtin_expect (m == 0, 0)) - return; - - size_t n = sizeof(d->MEMBER[0]); - for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n) - store_mask (&d->MEMBER[i], s->MEMBER[i], m); -} - -} // namespace GTM diff --git a/libitm/config/x86/cacheline.h b/libitm/config/x86/cacheline.h index f91d7ccb802..337c9995c17 100644 --- a/libitm/config/x86/cacheline.h +++ b/libitm/config/x86/cacheline.h @@ -40,8 +40,6 @@ namespace GTM HIDDEN { // in the cacheline with which it is associated. typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask; -extern uint32_t const gtm_bit_to_byte_mask[16]; - union gtm_cacheline { // Byte access to the cacheline. @@ -67,23 +65,6 @@ union gtm_cacheline __m256i m256i[CACHELINE_SIZE / sizeof(__m256i)]; #endif - // Store S into D, but only the bytes specified by M. - static void store_mask (uint32_t *d, uint32_t s, uint8_t m); - static void store_mask (uint64_t *d, uint64_t s, uint8_t m); -#ifdef __SSE2__ - static void store_mask (__m128i *d, __m128i s, uint16_t m); -#endif - - // Copy S to D, but only the bytes specified by M. - static void copy_mask (gtm_cacheline * __restrict d, - const gtm_cacheline * __restrict s, - gtm_cacheline_mask m); - - // A write barrier to emit after (a series of) copy_mask. - // When we're emitting non-temporal stores, the normal strong - // ordering of the machine doesn't apply. - static void copy_mask_wb (); - #if defined(__SSE__) || defined(__AVX__) // Copy S to D; only bother defining if we can do this more efficiently // than the compiler-generated default implementation. @@ -91,14 +72,6 @@ union gtm_cacheline #endif // SSE, AVX }; -inline void -gtm_cacheline::copy_mask_wb () -{ -#ifdef __SSE2__ - _mm_sfence (); -#endif -} - #if defined(__SSE__) || defined(__AVX__) inline gtm_cacheline& ALWAYS_INLINE gtm_cacheline::operator= (const gtm_cacheline & __restrict s) @@ -141,103 +114,11 @@ gtm_cacheline::operator= (const gtm_cacheline & __restrict s) } return *this; -} -#endif -// Support masked integer stores more efficiently with an unlocked cmpxchg -// insn. My reasoning is that while we write to locations that we do not wish -// to modify, we do it in an uninterruptable insn, and so we either truely -// write back the original data or the insn fails -- unlike with a -// load/and/or/write sequence which can be interrupted either by a kernel -// task switch or an unlucky cacheline steal by another processor. Avoiding -// the LOCK prefix improves performance by a factor of 10, and we don't need -// the memory barrier semantics implied by that prefix. - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m) -{ - gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { - gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15]; - gtm_word n, o = *d; - - __asm("\n0:\t" - "mov %[o], %[n]\n\t" - "and %[m], %[n]\n\t" - "or %[s], %[n]\n\t" - "cmpxchg %[n], %[d]\n\t" - ".byte 0x2e\n\t" // predict not-taken, aka jnz,pn - "jnz 0b" - : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) - : [s] "r" (s & bm), [m] "r" (~bm)); - } - } +#undef CP +#undef TYPE } - -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m) -{ - gtm_cacheline_mask tm = (1 << sizeof (s)) - 1; - if (__builtin_expect (m & tm, tm)) - { - if (__builtin_expect ((m & tm) == tm, 1)) - *d = s; - else - { -#ifdef __x86_64__ - uint32_t bl = gtm_bit_to_byte_mask[m & 15]; - uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15]; - gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1); - uint64_t n, o = *d; - __asm("\n0:\t" - "mov %[o], %[n]\n\t" - "and %[m], %[n]\n\t" - "or %[s], %[n]\n\t" - "cmpxchg %[n], %[d]\n\t" - ".byte 0x2e\n\t" // predict not-taken, aka jnz,pn - "jnz 0b" - : [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o) - : [s] "r" (s & bm), [m] "r" (~bm)); -#else - /* ??? While it's possible to perform this operation with - cmpxchg8b, the sequence requires all 7 general registers - and thus cannot be performed with -fPIC. Don't even try. */ - uint32_t *d32 = reinterpret_cast<uint32_t *>(d); - store_mask (d32, s, m); - store_mask (d32 + 1, s >> 32, m >> 4); #endif - } - } -} - -#ifdef __SSE2__ -inline void ALWAYS_INLINE -gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m) -{ - if (__builtin_expect (m == 0, 0)) - return; - if (__builtin_expect (m == 0xffff, 1)) - *d = s; - else - { - __m128i bm0, bm1, bm2, bm3; - bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4; - bm0 = _mm_unpacklo_epi32 (bm0, bm1); - bm2 = _mm_unpacklo_epi32 (bm2, bm3); - bm0 = _mm_unpacklo_epi64 (bm0, bm2); - - _mm_maskmoveu_si128 (s, bm0, (char *)d); - } -} -#endif // SSE2 } // namespace GTM diff --git a/libitm/config/x86/unaligned.h b/libitm/config/x86/unaligned.h deleted file mode 100644 index 01abc47dccb..00000000000 --- a/libitm/config/x86/unaligned.h +++ /dev/null @@ -1,237 +0,0 @@ -/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. - Contributed by Richard Henderson <rth@redhat.com>. - - This file is part of the GNU Transactional Memory Library (libitm). - - Libitm is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - Libitm is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef LIBITM_X86_UNALIGNED_H -#define LIBITM_X86_UNALIGNED_H 1 - -#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1 -#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1 - -#include "config/generic/unaligned.h" - -namespace GTM HIDDEN { - -template<> -inline uint32_t -unaligned_load2<uint32_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - uint32_t r, lo, hi; - lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - hi = c2->u32[0]; - asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); - return r; -} - -template<> -inline uint64_t -unaligned_load2<uint64_t>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ -#ifdef __x86_64__ - uint64_t r, lo, hi; - lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1]; - hi = c2->u64[0]; - asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo)); - return r; -#else - uint32_t v0, v1, v2; - uint64_t r; - - if (ofs < CACHELINE_SIZE - 4) - { - v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2]; - v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - v2 = c2->u32[0]; - } - else - { - v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1]; - v1 = c2->u32[0]; - v2 = c2->u32[1]; - } - ofs = (ofs & 3) * 8; - asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]" - : "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2)); - - return r; -#endif -} - -#if defined(__SSE2__) || defined(__MMX__) -template<> -inline _ITM_TYPE_M64 -unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ -# ifdef __x86_64__ - __m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]); - __m128i hi = _mm_movpi64_epi64 (c2->m64[0]); - - ofs = (ofs & 7) * 8; - lo = _mm_srli_epi64 (lo, ofs); - hi = _mm_slli_epi64 (hi, 64 - ofs); - lo = lo | hi; - return _mm_movepi64_pi64 (lo); -# else - // On 32-bit we're about to return the result in an MMX register, so go - // ahead and do the computation in that unit, even if SSE2 is available. - __m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1]; - __m64 hi = c2->m64[0]; - - ofs = (ofs & 7) * 8; - lo = _mm_srli_si64 (lo, ofs); - hi = _mm_slli_si64 (hi, 64 - ofs); - return lo | hi; -# endif -} -#endif // SSE2 or MMX - -// The SSE types are strictly aligned. -#ifdef __SSE__ -template<> - struct strict_alignment<_ITM_TYPE_M128> - : public std::true_type - { }; - -// Expand the unaligned SSE move instructions. -template<> -inline _ITM_TYPE_M128 -unaligned_load<_ITM_TYPE_M128>(const void *t) -{ - return _mm_loadu_ps (static_cast<const float *>(t)); -} - -template<> -inline void -unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val) -{ - _mm_storeu_ps (static_cast<float *>(t), val); -} -#endif // SSE - -#ifdef __AVX__ -// The AVX types are strictly aligned when it comes to vmovaps vs vmovups. -template<> - struct strict_alignment<_ITM_TYPE_M256> - : public std::true_type - { }; - -template<> -inline _ITM_TYPE_M256 -unaligned_load<_ITM_TYPE_M256>(const void *t) -{ - return _mm256_loadu_ps (static_cast<const float *>(t)); -} - -template<> -inline void -unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val) -{ - _mm256_storeu_ps (static_cast<float *>(t), val); -} -#endif // AVX - -#ifdef __XOP__ -# define HAVE_ARCH_REALIGN_M128I 1 -extern const __v16qi GTM_vpperm_shift[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]); -} -#elif defined(__AVX__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const uint64_t GTM_vpalignr_table[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = hi; - register __m128i xmm1 __asm__("xmm1") = lo; - __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), - "r"(>M_vpalignr_table[byte_count])); - return xmm0; -} -#elif defined(__SSSE3__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const uint64_t GTM_palignr_table[16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = hi; - register __m128i xmm1 __asm__("xmm1") = lo; - __asm("call *%2" : "+x"(xmm0) : "x"(xmm1), - "r"(>M_palignr_table[byte_count])); - return xmm0; -} -#elif defined(__SSE2__) -# define HAVE_ARCH_REALIGN_M128I 1 -extern "C" const char GTM_pshift_table[16 * 16]; -inline __m128i -realign_m128i (__m128i lo, __m128i hi, unsigned byte_count) -{ - register __m128i xmm0 __asm__("xmm0") = lo; - register __m128i xmm1 __asm__("xmm1") = hi; - __asm("call *%2" : "+x"(xmm0), "+x"(xmm1) - : "r"(GTM_pshift_table + byte_count*16)); - return xmm0; -} -#endif // XOP, AVX, SSSE3, SSE2 - -#ifdef HAVE_ARCH_REALIGN_M128I -template<> -inline _ITM_TYPE_M128 -unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - return (_ITM_TYPE_M128) - realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1], - c2->m128i[0], ofs & 15); -} -#endif // HAVE_ARCH_REALIGN_M128I - -#ifdef __AVX__ -template<> -inline _ITM_TYPE_M256 -unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1, - const gtm_cacheline *c2, size_t ofs) -{ - __m128i v0, v1; - __m256i r; - - v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs); - if (ofs < CACHELINE_SIZE - 16) - v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]); - else - v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]); - - r = _mm256_castsi128_si256 ((__m128i)v0); - r = _mm256_insertf128_si256 (r, (__m128i)v1, 1); - return (_ITM_TYPE_M256) r; -} -#endif // AVX - -} // namespace GTM - -#endif // LIBITM_X86_UNALIGNED_H diff --git a/libitm/config/x86/x86_avx.cc b/libitm/config/x86/x86_avx.cc index 9d1ddfb3ee8..30420aa87f5 100644 --- a/libitm/config/x86/x86_avx.cc +++ b/libitm/config/x86/x86_avx.cc @@ -34,62 +34,3 @@ _ITM_LM256 (const _ITM_TYPE_M256 *ptr) { GTM::GTM_LB (ptr, sizeof (*ptr)); } - -// Helpers for re-aligning two 128-bit values. -#ifdef __XOP__ -const __v16qi GTM::GTM_vpperm_shift[16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, - { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, - { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }, - { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 }, - { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }, - { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 }, - { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 }, - { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, - { 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }, - { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }, - { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }, - { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }, - { 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 }, - { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 }, - { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }, -}; -#else -# define INSN0 "movdqa %xmm1, %xmm0" -# define INSN(N) "vpalignr $" #N ", %xmm0, %xmm1, %xmm0" -# define TABLE_ENT_0 INSN0 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_vpalignr_table\n\ - .hidden GTM_vpalignr_table\n\ - .type GTM_vpalignr_table, @function\n\ -GTM_vpalignr_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT(8) - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_vpalignr_table, .-GTM_vpalignr_table\n\ - .popsection"); - -# undef INSN0 -# undef INSN -# undef TABLE_ENT_0 -# undef TABLE_ENT -#endif diff --git a/libitm/config/x86/x86_sse.cc b/libitm/config/x86/x86_sse.cc index 7440c949cb7..5a1c67ac8b1 100644 --- a/libitm/config/x86/x86_sse.cc +++ b/libitm/config/x86/x86_sse.cc @@ -41,82 +41,3 @@ _ITM_LM128 (const _ITM_TYPE_M128 *ptr) { GTM::GTM_LB (ptr, sizeof (*ptr)); } - -// Helpers for re-aligning two 128-bit values. -#ifdef __SSSE3__ -# define INSN0 "movdqa %xmm1, %xmm0" -# define INSN(N) "palignr $" #N ", %xmm1, %xmm0" -# define TABLE_ENT_0 INSN0 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_palignr_table\n\ - .hidden GTM_palignr_table\n\ - .type GTM_palignr_table, @function\n\ -GTM_palignr_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT(8) - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_palignr_table, .-GTM_palignr_table\n\ - .popsection"); - -# undef INSN0 -# undef INSN -# undef TABLE_ENT_0 -# undef TABLE_ENT -#elif defined(__SSE2__) -# define INSNS_8 "punpcklqdq %xmm1, %xmm0" -# define INSNS(N) "psrldq $"#N", %xmm0\n\t" \ - "pslldq $(16-"#N"), %xmm1\n\t" \ - "por %xmm1, %xmm0" -# define TABLE_ENT_0 "ret\n\t" -# define TABLE_ENT_8 ".balign 16\n\t" INSNS_8 "\n\tret\n\t" -# define TABLE_ENT(N) ".balign 16\n\t" INSNS(N) "\n\tret\n\t" - -asm(".pushsection .text\n\ - .balign 16\n\ - .globl GTM_pshift_table\n\ - .hidden GTM_pshift_table\n\ - .type GTM_pshift_table, @function\n\ -GTM_pshift_table:\n\t" - TABLE_ENT_0 - TABLE_ENT(1) - TABLE_ENT(2) - TABLE_ENT(3) - TABLE_ENT(4) - TABLE_ENT(5) - TABLE_ENT(6) - TABLE_ENT(7) - TABLE_ENT_8 - TABLE_ENT(9) - TABLE_ENT(10) - TABLE_ENT(11) - TABLE_ENT(12) - TABLE_ENT(13) - TABLE_ENT(14) - TABLE_ENT(15) - ".balign 8\n\ - .size GTM_pshift_table, .-GTM_pshift_table\n\ - .popsection"); - -# undef INSNS_8 -# undef INSNS -# undef TABLE_ENT_0 -# undef TABLE_ENT_8 -# undef TABLE_ENT -#endif |

