summaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2010-08-13 08:52:19 -0400
committerChris Metcalf <cmetcalf@tilera.com>2010-08-13 08:52:19 -0400
commitc745a8a11fa1df6078bfc61fc29492ed43f71c2b (patch)
tree2db1cdf9cd0d0e892f4f92de1fd2700ac319f04a /arch/tile/lib
parent1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac (diff)
downloadblackbird-op-linux-c745a8a11fa1df6078bfc61fc29492ed43f71c2b.tar.gz
blackbird-op-linux-c745a8a11fa1df6078bfc61fc29492ed43f71c2b.zip
arch/tile: Various cleanups.
This change rolls up random cleanups not representing any actual bugs. - Remove a stale CONFIG_ value from the default tile_defconfig - Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h> - Optimize get_order() using Tile's "clz" instruction - Fix a bad hypervisor upcall name (not currently used in Linux anyway) - Use __copy_in_user_inatomic() name for consistency, and export it - Export some additional hypervisor driver I/O upcalls and some homecache calls - Remove the obfuscating MEMCPY_TEST_WH64 support code - Other stray comment cleanups, #if 0 removal, etc. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/Makefile4
-rw-r--r--arch/tile/lib/exports.c16
-rw-r--r--arch/tile/lib/memcpy_32.S20
-rw-r--r--arch/tile/lib/memset_32.c25
4 files changed, 18 insertions, 47 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 438af38bc9eb..746dc81ed3c4 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \
memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
strchr_$(BITS).o strlen_$(BITS).o
-ifneq ($(CONFIG_TILEGX),y)
+ifeq ($(CONFIG_TILEGX),y)
+lib-y += memcpy_user_64.o
+else
lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
endif
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 6bc7b52b4aa0..ce5dbf56578f 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(current_text_addr);
EXPORT_SYMBOL(dump_stack);
-/* arch/tile/lib/__memcpy.S */
-/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
+/* arch/tile/lib/, various memcpy files */
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__copy_to_user_inatomic);
EXPORT_SYMBOL(__copy_from_user_inatomic);
EXPORT_SYMBOL(__copy_from_user_zeroing);
+#ifdef __tilegx__
+EXPORT_SYMBOL(__copy_in_user_inatomic);
+#endif
/* hypervisor glue */
#include <hv/hypervisor.h>
EXPORT_SYMBOL(hv_dev_open);
EXPORT_SYMBOL(hv_dev_pread);
EXPORT_SYMBOL(hv_dev_pwrite);
+EXPORT_SYMBOL(hv_dev_preada);
+EXPORT_SYMBOL(hv_dev_pwritea);
+EXPORT_SYMBOL(hv_dev_poll);
+EXPORT_SYMBOL(hv_dev_poll_cancel);
EXPORT_SYMBOL(hv_dev_close);
+EXPORT_SYMBOL(hv_sysconf);
+EXPORT_SYMBOL(hv_confstr);
-/* -ltile-cc */
+/* libgcc.a */
uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
EXPORT_SYMBOL(__udivsi3);
int32_t __divsi3(int32_t dividend, int32_t divisor);
@@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3);
#ifndef __tilegx__
uint64_t __ll_mul(uint64_t n0, uint64_t n1);
EXPORT_SYMBOL(__ll_mul);
-#endif
-#ifndef __tilegx__
int64_t __muldi3(int64_t, int64_t);
EXPORT_SYMBOL(__muldi3);
uint64_t __lshrdi3(uint64_t, unsigned int);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index f92984bf60ec..30c3b7ebb55d 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -17,10 +17,6 @@
#include <arch/chip.h>
-#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64)
-#define MEMCPY_USE_WH64
-#endif
-
#include <linux/linkage.h>
@@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
{ addi r3, r1, 60; andi r9, r9, -64 }
-#ifdef MEMCPY_USE_WH64
+#if CHIP_HAS_WH64()
/* No need to prefetch dst, we'll just do the wh64
* right before we copy a line.
*/
@@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . }
EX: { lw r7, r3; addi r3, r3, 64 }
-#ifndef MEMCPY_USE_WH64
+#if !CHIP_HAS_WH64()
/* Prefetch the dest */
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . }
@@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 }
/* Fill second L1D line. */
EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
-#ifdef MEMCPY_TEST_WH64
- /* Issue a fake wh64 that clobbers the destination words
- * with random garbage, for testing.
- */
- { movei r19, 64; crc32_32 r10, r2, r9 }
-.Lwh64_test_loop:
-EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 }
- { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 }
-#elif CHIP_HAS_WH64()
+#if CHIP_HAS_WH64()
/* Prepare destination line for writing. */
EX: { wh64 r9; addi r9, r9, 64 }
#else
@@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
-#ifdef MEMCPY_USE_WH64
+#if CHIP_HAS_WH64()
EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
#else
/* Back up the r9 to a cache line we are already storing to
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index bfde5d864df1..d014c1fbcbc2 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n)
*/
__insn_prefetch(&out32[ahead32]);
-#if 1
#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
#endif
@@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n)
*out32++ = v32;
*out32++ = v32;
}
-#else
- /* Unfortunately, due to a code generator flaw this
- * allocates a separate register for each of these
- * stores, which requires a large number of spills,
- * which makes this procedure enormously bigger
- * (something like 70%)
- */
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- n32 -= 16;
-#endif
/* To save compiled code size, reuse this loop even
* when we run out of prefetching to do by dropping
OpenPOWER on IntegriCloud