diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 4 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/cpumask.c | 28 | ||||
-rw-r--r-- | lib/devres.c | 28 | ||||
-rw-r--r-- | lib/iommu-common.c | 270 | ||||
-rw-r--r-- | lib/raid6/algos.c | 41 | ||||
-rw-r--r-- | lib/raid6/altivec.uc | 1 | ||||
-rw-r--r-- | lib/raid6/avx2.c | 3 | ||||
-rw-r--r-- | lib/raid6/int.uc | 41 | ||||
-rw-r--r-- | lib/raid6/mmx.c | 2 | ||||
-rw-r--r-- | lib/raid6/neon.c | 1 | ||||
-rw-r--r-- | lib/raid6/sse1.c | 2 | ||||
-rw-r--r-- | lib/raid6/sse2.c | 227 | ||||
-rw-r--r-- | lib/raid6/test/test.c | 51 | ||||
-rw-r--r-- | lib/raid6/tilegx.uc | 1 | ||||
-rw-r--r-- | lib/test-hexdump.c | 2 |
16 files changed, 647 insertions, 57 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index f5440221d929..601965a948e8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -396,10 +396,6 @@ config CPUMASK_OFFSTACK them on the stack. This is a bit more expensive, but avoids stack overflow. -config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS - bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS - depends on BROKEN - config CPU_RMAP bool depends on SMP diff --git a/lib/Makefile b/lib/Makefile index da6116b21555..6c37933336a0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o diff --git a/lib/cpumask.c b/lib/cpumask.c index 5ab1553fd076..830dd5dec40f 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,27 +5,6 @@ #include <linux/export.h> #include <linux/bootmem.h> -int __first_cpu(const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS)); -} -EXPORT_SYMBOL(__first_cpu); - -int __next_cpu(int n, const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1)); -} -EXPORT_SYMBOL(__next_cpu); - -#if NR_CPUS > 64 -int __next_cpu_nr(int n, const cpumask_t *srcp) -{ - return min_t(int, nr_cpu_ids, - find_next_bit(srcp->bits, nr_cpu_ids, n+1)); -} -EXPORT_SYMBOL(__next_cpu_nr); -#endif - /** * cpumask_next_and - get the next cpu in *src1p & *src2p * @n: the cpu prior to the place to search (ie. return will be > @n) @@ -90,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) dump_stack(); } #endif - /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ - if (*mask) { - unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); - unsigned int tail; - tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); - memset(ptr + cpumask_size() - tail, 0, tail); - } return *mask != NULL; } diff --git a/lib/devres.c b/lib/devres.c index 0f1dd2e9d2c1..fbe2aac522e6 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, EXPORT_SYMBOL(devm_ioremap_nocache); /** + * devm_ioremap_wc - Managed ioremap_wc() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap_wc(). Map is automatically unmapped on driver detach. + */ +void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, + resource_size_t size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_wc(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_wc); + +/** * devm_iounmap - Managed iounmap() * @dev: Generic device to unmap for * @addr: Address to unmap diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..df30632f0bef --- /dev/null +++ b/lib/iommu-common.c @@ -0,0 +1,270 @@ +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/iommu-helper.h> +#include <linux/iommu-common.h> +#include <linux/dma-mapping.h> +#include <linux/hash.h> + +#ifndef DMA_ERROR_CODE +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) +#endif + +static unsigned long iommu_large_alloc = 15; + +static DEFINE_PER_CPU(unsigned int, iommu_hash_common); + +static inline bool need_flush(struct iommu_map_table *iommu) +{ + return (iommu->lazy_flush != NULL && + (iommu->flags & IOMMU_NEED_FLUSH) != 0); +} + +static inline void set_flush(struct iommu_map_table *iommu) +{ + iommu->flags |= IOMMU_NEED_FLUSH; +} + +static inline void clear_flush(struct iommu_map_table *iommu) +{ + iommu->flags &= ~IOMMU_NEED_FLUSH; +} + +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + +/* + * Initialize iommu_pool entries for the iommu_map_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than iommu_large_alloc pages. + */ +void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + setup_iommu_pool_hash(); + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->table_shift = table_shift; + iommu->lazy_flush = lazy_flush; + start = 0; + if (skip_span_boundary_check) + iommu->flags |= IOMMU_NO_SPAN_BOUND; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->pools[i].lock)); + iommu->pools[i].start = start; + iommu->pools[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->pools[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} +EXPORT_SYMBOL(iommu_tbl_pool_init); + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned int pool_hash = __this_cpu_read(iommu_hash_common); + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *pool; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > iommu_large_alloc); + unsigned long shift; + unsigned long align_mask = 0; + + if (align_order > 0) + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* Sanity check */ + if (unlikely(npages == 0)) { + WARN_ON_ONCE(1); + return DMA_ERROR_CODE; + } + + if (largealloc) { + pool = &(iommu->large_pool); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + pool = &(iommu->pools[pool_nr]); + } + spin_lock_irqsave(&pool->lock, flags); + + again: + if (pass == 0 && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) + start = *handle; + else + start = pool->hint; + + limit = pool->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning. If a + * flush is needed, it will get done based on the return value + * from iommu_area_alloc() below. + */ + if (start >= limit) + start = pool->start; + shift = iommu->table_map_base >> iommu->table_shift; + if (limit + shift > mask) { + limit = mask - shift + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0 in pool 0. + */ + if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); + pool = &(iommu->pools[0]); + spin_lock(&(pool->lock)); + start = pool->start; + } else { + start &= mask; + } + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->table_shift); + else + boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); + + boundary_size = boundary_size >> iommu->table_shift; + /* + * if the skip_span_boundary_check had been set during init, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, align_mask); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + pool = &(iommu->pools[pool_nr]); + spin_lock(&(pool->lock)); + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else { + /* give up */ + n = DMA_ERROR_CODE; + goto bail; + } + } + if (n < pool->hint || need_flush(iommu)) { + clear_flush(iommu); + iommu->lazy_flush(iommu); + } + + end = n + npages; + pool->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; +bail: + spin_unlock_irqrestore(&(pool->lock), flags); + + return n; +} +EXPORT_SYMBOL(iommu_tbl_range_alloc); + +static struct iommu_pool *get_pool(struct iommu_map_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->pools[pool_nr]; + } + return p; +} + +/* Caller supplies the index of the entry into the iommu map table + * itself when the mapping from dma_addr to the entry is not the + * default addr->entry mapping below. + */ +void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, + unsigned long npages, unsigned long entry) +{ + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->table_shift; + + if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ + entry = (dma_addr - iommu->table_map_base) >> shift; + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} +EXPORT_SYMBOL(iommu_tbl_range_free); diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index dbef2314901e..975c6e0434bd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - unsigned long perf, bestperf, j0, j1; + unsigned long perf, bestgenperf, bestxorperf, j0, j1; + int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (perf > bestperf) { - bestperf = perf; + if (perf > bestgenperf) { + bestgenperf = perf; best = *algo; } - pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, + pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + + if (!(*algo)->xor_syndrome) + continue; + + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; + } + preempt_enable(); + + if (best == *algo) + bestxorperf = perf; + + pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); } } if (best) { - pr_info("raid6: using algorithm %s (%ld MB/s)\n", + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", best->name, - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); raid6_call = *best; } else pr_err("raid6: Yikes! No algorithm found!\n"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 7cc12b532e95..bec27fce7501 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -119,6 +119,7 @@ int raid6_have_altivec(void) const struct raid6_calls raid6_altivec$# = { raid6_altivec$#_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_altivec, "altivecx$#", 0 diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index bc3b1dd436eb..76734004358d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x1 = { raid6_avx21_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x1", 1 /* Has cache hints */ @@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x2 = { raid6_avx22_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x2", 1 /* Has cache hints */ @@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x4 = { raid6_avx24_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5b50f8dfc5d2..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) } } +static void raid6_int$#_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + /* P/Q data pages */ + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= start ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + wq$$ = w1$$ ^ w2$$; + } + *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + } + +} + const struct raid6_calls raid6_intx$# = { raid6_int$#_gen_syndrome, - NULL, /* always valid */ + raid6_int$#_xor_syndrome, + NULL, /* always valid */ "int" NSTRING "x$#", 0 }; diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 590c71c9e200..b3b0e1fcd3af 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c @@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx1 = { raid6_mmx1_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx1", 0 @@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx2 = { raid6_mmx2_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx2", 0 diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 36ad4705df1a..d9ad6ee284f4 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -42,6 +42,7 @@ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ + NULL, /* XOR not yet implemented */ \ raid6_have_neon, \ "neonx" #_n, \ 0 \ diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index f76297139445..9025b8ca9aa3 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c @@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x1 = { raid6_sse11_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x1", 1 /* Has cache hints */ @@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x2 = { raid6_sse12_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x2", 1 /* Has cache hints */ diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 85b82c85f28e..1d2276b007ee 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + +static void raid6_sse21_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("pxor %xmm4,%xmm2"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm5,%xmm4"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + const struct raid6_calls raid6_sse2x1 = { raid6_sse21_gen_syndrome, + raid6_sse21_xor_syndrome, raid6_have_sse2, "sse2x1", 1 /* Has cache hints */ @@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse22_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, + raid6_sse22_xor_syndrome, raid6_have_sse2, "sse2x2", 1 /* Has cache hints */ @@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse24_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 64 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32])); + asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32])); + asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + asm volatile("pxor %xmm12,%xmm10"); + asm volatile("pxor %xmm14,%xmm11"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("prefetchnta %0" :: "m" (q[d])); + asm volatile("prefetchnta %0" :: "m" (q[d+32])); + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32])); + asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + } + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + + const struct raid6_calls raid6_sse2x4 = { raid6_sse24_gen_syndrome, + raid6_sse24_xor_syndrome, raid6_have_sse2, "sse2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 5a485b7a7d3c..3bebbabdb510 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -28,11 +28,11 @@ char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE]; char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; -static void makedata(void) +static void makedata(int start, int stop) { int i, j; - for (i = 0; i < NDISKS; i++) { + for (i = start; i <= stop; i++) { for (j = 0; j < PAGE_SIZE; j++) data[i][j] = rand(); @@ -91,34 +91,55 @@ int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; const struct raid6_recov_calls *const *ra; - int i, j; + int i, j, p1, p2; int err = 0; - makedata(); + makedata(0, NDISKS-1); for (ra = raid6_recov_algos; *ra; ra++) { if ((*ra)->valid && !(*ra)->valid()) continue; + raid6_2data_recov = (*ra)->data2; raid6_datap_recov = (*ra)->datap; printf("using recovery %s\n", (*ra)->name); for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + if ((*algo)->valid && !(*algo)->valid()) + continue; + + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + + if (!raid6_call.xor_syndrome) + continue; + + for (p1 = 0; p1 < NDISKS-2; p1++) + for (p2 = p1; p2 < NDISKS-2; p2++) { - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + /* Simulate rmw run */ + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); + makedata(p1, p2); + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); - } } printf("\n"); } diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc index e7c29459cbcd..2dd291a11264 100644 --- a/lib/raid6/tilegx.uc +++ b/lib/raid6/tilegx.uc @@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_tilegx$# = { raid6_tilegx$#_gen_syndrome, + NULL, /* XOR not yet implemented */ NULL, "tilegx$#", 0 diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c index 9846ff7428b3..c227cc43ec0a 100644 --- a/lib/test-hexdump.c +++ b/lib/test-hexdump.c @@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, char test[32 * 3 + 2 + 32 + 1]; char real[32 * 3 + 2 + 32 + 1]; char *p; - const char **result; + const char * const *result; size_t l = len; int gs = groupsize, rs = rowsize; unsigned int i; |