summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/Makefile2
-rw-r--r--lib/cpumask.c28
-rw-r--r--lib/devres.c28
-rw-r--r--lib/iommu-common.c270
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
-rw-r--r--lib/test-hexdump.c2
16 files changed, 647 insertions, 57 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index f5440221d929..601965a948e8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,10 +396,6 @@ config CPUMASK_OFFSTACK
them on the stack. This is a bit more expensive, but avoids
stack overflow.
-config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
- bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
- depends on BROKEN
-
config CPU_RMAP
bool
depends on SMP
diff --git a/lib/Makefile b/lib/Makefile
index da6116b21555..6c37933336a0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5ab1553fd076..830dd5dec40f 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -5,27 +5,6 @@
#include <linux/export.h>
#include <linux/bootmem.h>
-int __first_cpu(const cpumask_t *srcp)
-{
- return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
-}
-EXPORT_SYMBOL(__first_cpu);
-
-int __next_cpu(int n, const cpumask_t *srcp)
-{
- return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
-}
-EXPORT_SYMBOL(__next_cpu);
-
-#if NR_CPUS > 64
-int __next_cpu_nr(int n, const cpumask_t *srcp)
-{
- return min_t(int, nr_cpu_ids,
- find_next_bit(srcp->bits, nr_cpu_ids, n+1));
-}
-EXPORT_SYMBOL(__next_cpu_nr);
-#endif
-
/**
* cpumask_next_and - get the next cpu in *src1p & *src2p
* @n: the cpu prior to the place to search (ie. return will be > @n)
@@ -90,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
dump_stack();
}
#endif
- /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
- if (*mask) {
- unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
- unsigned int tail;
- tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
- memset(ptr + cpumask_size() - tail, 0, tail);
- }
return *mask != NULL;
}
diff --git a/lib/devres.c b/lib/devres.c
index 0f1dd2e9d2c1..fbe2aac522e6 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
EXPORT_SYMBOL(devm_ioremap_nocache);
/**
+ * devm_ioremap_wc - Managed ioremap_wc()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap_wc(). Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
+ resource_size_t size)
+{
+ void __iomem **ptr, *addr;
+
+ ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = ioremap_wc(offset, size);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_wc);
+
+/**
* devm_iounmap - Managed iounmap()
* @dev: Generic device to unmap for
* @addr: Address to unmap
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
new file mode 100644
index 000000000000..df30632f0bef
--- /dev/null
+++ b/lib/iommu-common.c
@@ -0,0 +1,270 @@
+/*
+ * IOMMU mmap management and range allocation functions.
+ * Based almost entirely upon the powerpc iommu allocator.
+ */
+
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/iommu-helper.h>
+#include <linux/iommu-common.h>
+#include <linux/dma-mapping.h>
+#include <linux/hash.h>
+
+#ifndef DMA_ERROR_CODE
+#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
+#endif
+
+static unsigned long iommu_large_alloc = 15;
+
+static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
+
+static inline bool need_flush(struct iommu_map_table *iommu)
+{
+ return (iommu->lazy_flush != NULL &&
+ (iommu->flags & IOMMU_NEED_FLUSH) != 0);
+}
+
+static inline void set_flush(struct iommu_map_table *iommu)
+{
+ iommu->flags |= IOMMU_NEED_FLUSH;
+}
+
+static inline void clear_flush(struct iommu_map_table *iommu)
+{
+ iommu->flags &= ~IOMMU_NEED_FLUSH;
+}
+
+static void setup_iommu_pool_hash(void)
+{
+ unsigned int i;
+ static bool do_once;
+
+ if (do_once)
+ return;
+ do_once = true;
+ for_each_possible_cpu(i)
+ per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+}
+
+/*
+ * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
+ * is the number of table entries. If `large_pool' is set to true,
+ * the top 1/4 of the table will be set aside for pool allocations
+ * of more than iommu_large_alloc pages.
+ */
+void iommu_tbl_pool_init(struct iommu_map_table *iommu,
+ unsigned long num_entries,
+ u32 table_shift,
+ void (*lazy_flush)(struct iommu_map_table *),
+ bool large_pool, u32 npools,
+ bool skip_span_boundary_check)
+{
+ unsigned int start, i;
+ struct iommu_pool *p = &(iommu->large_pool);
+
+ setup_iommu_pool_hash();
+ if (npools == 0)
+ iommu->nr_pools = IOMMU_NR_POOLS;
+ else
+ iommu->nr_pools = npools;
+ BUG_ON(npools > IOMMU_NR_POOLS);
+
+ iommu->table_shift = table_shift;
+ iommu->lazy_flush = lazy_flush;
+ start = 0;
+ if (skip_span_boundary_check)
+ iommu->flags |= IOMMU_NO_SPAN_BOUND;
+ if (large_pool)
+ iommu->flags |= IOMMU_HAS_LARGE_POOL;
+
+ if (!large_pool)
+ iommu->poolsize = num_entries/iommu->nr_pools;
+ else
+ iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
+ for (i = 0; i < iommu->nr_pools; i++) {
+ spin_lock_init(&(iommu->pools[i].lock));
+ iommu->pools[i].start = start;
+ iommu->pools[i].hint = start;
+ start += iommu->poolsize; /* start for next pool */
+ iommu->pools[i].end = start - 1;
+ }
+ if (!large_pool)
+ return;
+ /* initialize large_pool */
+ spin_lock_init(&(p->lock));
+ p->start = start;
+ p->hint = p->start;
+ p->end = num_entries;
+}
+EXPORT_SYMBOL(iommu_tbl_pool_init);
+
+unsigned long iommu_tbl_range_alloc(struct device *dev,
+ struct iommu_map_table *iommu,
+ unsigned long npages,
+ unsigned long *handle,
+ unsigned long mask,
+ unsigned int align_order)
+{
+ unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
+ unsigned long n, end, start, limit, boundary_size;
+ struct iommu_pool *pool;
+ int pass = 0;
+ unsigned int pool_nr;
+ unsigned int npools = iommu->nr_pools;
+ unsigned long flags;
+ bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
+ bool largealloc = (large_pool && npages > iommu_large_alloc);
+ unsigned long shift;
+ unsigned long align_mask = 0;
+
+ if (align_order > 0)
+ align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ WARN_ON_ONCE(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (largealloc) {
+ pool = &(iommu->large_pool);
+ pool_nr = 0; /* to keep compiler happy */
+ } else {
+ /* pick out pool_nr */
+ pool_nr = pool_hash & (npools - 1);
+ pool = &(iommu->pools[pool_nr]);
+ }
+ spin_lock_irqsave(&pool->lock, flags);
+
+ again:
+ if (pass == 0 && handle && *handle &&
+ (*handle >= pool->start) && (*handle < pool->end))
+ start = *handle;
+ else
+ start = pool->hint;
+
+ limit = pool->end;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the beginning. If a
+ * flush is needed, it will get done based on the return value
+ * from iommu_area_alloc() below.
+ */
+ if (start >= limit)
+ start = pool->start;
+ shift = iommu->table_map_base >> iommu->table_shift;
+ if (limit + shift > mask) {
+ limit = mask - shift + 1;
+ /* If we're constrained on address range, first try
+ * at the masked hint to avoid O(n) search complexity,
+ * but on second pass, start at 0 in pool 0.
+ */
+ if ((start & mask) >= limit || pass > 0) {
+ spin_unlock(&(pool->lock));
+ pool = &(iommu->pools[0]);
+ spin_lock(&(pool->lock));
+ start = pool->start;
+ } else {
+ start &= mask;
+ }
+ }
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << iommu->table_shift);
+ else
+ boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
+
+ boundary_size = boundary_size >> iommu->table_shift;
+ /*
+ * if the skip_span_boundary_check had been set during init, we set
+ * things up so that iommu_is_span_boundary() merely checks if the
+ * (index + npages) < num_tsb_entries
+ */
+ if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
+ shift = 0;
+ boundary_size = iommu->poolsize * iommu->nr_pools;
+ }
+ n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
+ boundary_size, align_mask);
+ if (n == -1) {
+ if (likely(pass == 0)) {
+ /* First failure, rescan from the beginning. */
+ pool->hint = pool->start;
+ set_flush(iommu);
+ pass++;
+ goto again;
+ } else if (!largealloc && pass <= iommu->nr_pools) {
+ spin_unlock(&(pool->lock));
+ pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
+ pool = &(iommu->pools[pool_nr]);
+ spin_lock(&(pool->lock));
+ pool->hint = pool->start;
+ set_flush(iommu);
+ pass++;
+ goto again;
+ } else {
+ /* give up */
+ n = DMA_ERROR_CODE;
+ goto bail;
+ }
+ }
+ if (n < pool->hint || need_flush(iommu)) {
+ clear_flush(iommu);
+ iommu->lazy_flush(iommu);
+ }
+
+ end = n + npages;
+ pool->hint = end;
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+bail:
+ spin_unlock_irqrestore(&(pool->lock), flags);
+
+ return n;
+}
+EXPORT_SYMBOL(iommu_tbl_range_alloc);
+
+static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
+ unsigned long entry)
+{
+ struct iommu_pool *p;
+ unsigned long largepool_start = tbl->large_pool.start;
+ bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
+
+ /* The large pool is the last pool at the top of the table */
+ if (large_pool && entry >= largepool_start) {
+ p = &tbl->large_pool;
+ } else {
+ unsigned int pool_nr = entry / tbl->poolsize;
+
+ BUG_ON(pool_nr >= tbl->nr_pools);
+ p = &tbl->pools[pool_nr];
+ }
+ return p;
+}
+
+/* Caller supplies the index of the entry into the iommu map table
+ * itself when the mapping from dma_addr to the entry is not the
+ * default addr->entry mapping below.
+ */
+void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
+ unsigned long npages, unsigned long entry)
+{
+ struct iommu_pool *pool;
+ unsigned long flags;
+ unsigned long shift = iommu->table_shift;
+
+ if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
+ entry = (dma_addr - iommu->table_map_base) >> shift;
+ pool = get_pool(iommu, entry);
+
+ spin_lock_irqsave(&(pool->lock), flags);
+ bitmap_clear(iommu->map, entry, npages);
+ spin_unlock_irqrestore(&(pool->lock), flags);
+}
+EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
static inline const struct raid6_calls *raid6_choose_gen(
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
{
- unsigned long perf, bestperf, j0, j1;
+ unsigned long perf, bestgenperf, bestxorperf, j0, j1;
+ int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
const struct raid6_calls *const *algo;
const struct raid6_calls *best;
- for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+ for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
if (!best || (*algo)->prefer >= best->prefer) {
if ((*algo)->valid && !(*algo)->valid())
continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
}
preempt_enable();
- if (perf > bestperf) {
- bestperf = perf;
+ if (perf > bestgenperf) {
+ bestgenperf = perf;
best = *algo;
}
- pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+ pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+
+ if (!(*algo)->xor_syndrome)
+ continue;
+
+ perf = 0;
+
+ preempt_disable();
+ j0 = jiffies;
+ while ((j1 = jiffies) == j0)
+ cpu_relax();
+ while (time_before(jiffies,
+ j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+ (*algo)->xor_syndrome(disks, start, stop,
+ PAGE_SIZE, *dptrs);
+ perf++;
+ }
+ preempt_enable();
+
+ if (best == *algo)
+ bestxorperf = perf;
+
+ pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
+ (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
}
}
if (best) {
- pr_info("raid6: using algorithm %s (%ld MB/s)\n",
+ pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
best->name,
- (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ if (best->xor_syndrome)
+ pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+ (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
raid6_call = *best;
} else
pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
const struct raid6_calls raid6_altivec$# = {
raid6_altivec$#_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_altivec,
"altivecx$#",
0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x1 = {
raid6_avx21_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x1",
1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x2 = {
raid6_avx22_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x2",
1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x4 = {
raid6_avx24_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x4",
1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
}
}
+static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+ /* P/Q data pages */
+ wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ wp$$ ^= wd$$;
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ &= NBYTES(0x1d);
+ w1$$ ^= w2$$;
+ wq$$ = w1$$ ^ wd$$;
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ &= NBYTES(0x1d);
+ wq$$ = w1$$ ^ w2$$;
+ }
+ *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ }
+
+}
+
const struct raid6_calls raid6_intx$# = {
raid6_int$#_gen_syndrome,
- NULL, /* always valid */
+ raid6_int$#_xor_syndrome,
+ NULL, /* always valid */
"int" NSTRING "x$#",
0
};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx1 = {
raid6_mmx1_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx1",
0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx2 = {
raid6_mmx2_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx2",
0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
+ NULL, /* XOR not yet implemented */ \
raid6_have_neon, \
"neonx" #_n, \
0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x1 = {
raid6_sse11_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x1",
1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x2 = {
raid6_sse12_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x2",
1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+
+static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 16 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("pxor %xmm4,%xmm2");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm5,%xmm4");
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ }
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ /* Don't use movntdq for r/w memory area < cache line */
+ asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
const struct raid6_calls raid6_sse2x1 = {
raid6_sse21_gen_syndrome,
+ raid6_sse21_xor_syndrome,
raid6_have_sse2,
"sse2x1",
1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+ static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 32 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+ asm volatile("pxor %xmm4,%xmm2");
+ asm volatile("pxor %xmm6,%xmm3");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ }
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+ /* Don't use movntdq for r/w memory area < cache line */
+ asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
+ asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+ }
+
const struct raid6_calls raid6_sse2x2 = {
raid6_sse22_gen_syndrome,
+ raid6_sse22_xor_syndrome,
raid6_have_sse2,
"sse2x2",
1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+ static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 64 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+ asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
+ asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+ asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
+ asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
+ asm volatile("pxor %xmm4,%xmm2");
+ asm volatile("pxor %xmm6,%xmm3");
+ asm volatile("pxor %xmm12,%xmm10");
+ asm volatile("pxor %xmm14,%xmm11");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pxor %xmm13,%xmm13");
+ asm volatile("pxor %xmm15,%xmm15");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("pcmpgtb %xmm12,%xmm13");
+ asm volatile("pcmpgtb %xmm14,%xmm15");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("paddb %xmm12,%xmm12");
+ asm volatile("paddb %xmm14,%xmm14");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pand %xmm0,%xmm13");
+ asm volatile("pand %xmm0,%xmm15");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+ asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+ asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm13,%xmm10");
+ asm volatile("pxor %xmm15,%xmm11");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ }
+ asm volatile("prefetchnta %0" :: "m" (q[d]));
+ asm volatile("prefetchnta %0" :: "m" (q[d+32]));
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pxor %xmm13,%xmm13");
+ asm volatile("pxor %xmm15,%xmm15");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("pcmpgtb %xmm12,%xmm13");
+ asm volatile("pcmpgtb %xmm14,%xmm15");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("paddb %xmm12,%xmm12");
+ asm volatile("paddb %xmm14,%xmm14");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pand %xmm0,%xmm13");
+ asm volatile("pand %xmm0,%xmm15");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ }
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+ asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+ asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+ asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
+ asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
+ asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+ asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+ asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+ }
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+ }
+
+
const struct raid6_calls raid6_sse2x4 = {
raid6_sse24_gen_syndrome,
+ raid6_sse24_xor_syndrome,
raid6_have_sse2,
"sse2x4",
1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
char data[NDISKS][PAGE_SIZE];
char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
-static void makedata(void)
+static void makedata(int start, int stop)
{
int i, j;
- for (i = 0; i < NDISKS; i++) {
+ for (i = start; i <= stop; i++) {
for (j = 0; j < PAGE_SIZE; j++)
data[i][j] = rand();
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
{
const struct raid6_calls *const *algo;
const struct raid6_recov_calls *const *ra;
- int i, j;
+ int i, j, p1, p2;
int err = 0;
- makedata();
+ makedata(0, NDISKS-1);
for (ra = raid6_recov_algos; *ra; ra++) {
if ((*ra)->valid && !(*ra)->valid())
continue;
+
raid6_2data_recov = (*ra)->data2;
raid6_datap_recov = (*ra)->datap;
printf("using recovery %s\n", (*ra)->name);
for (algo = raid6_algos; *algo; algo++) {
- if (!(*algo)->valid || (*algo)->valid()) {
- raid6_call = **algo;
+ if ((*algo)->valid && !(*algo)->valid())
+ continue;
+
+ raid6_call = **algo;
+
+ /* Nuke syndromes */
+ memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+ /* Generate assumed good syndrome */
+ raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+ (void **)&dataptrs);
+
+ for (i = 0; i < NDISKS-1; i++)
+ for (j = i+1; j < NDISKS; j++)
+ err += test_disks(i, j);
+
+ if (!raid6_call.xor_syndrome)
+ continue;
+
+ for (p1 = 0; p1 < NDISKS-2; p1++)
+ for (p2 = p1; p2 < NDISKS-2; p2++) {
- /* Nuke syndromes */
- memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+ /* Simulate rmw run */
+ raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+ (void **)&dataptrs);
+ makedata(p1, p2);
+ raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+ (void **)&dataptrs);
- /* Generate assumed good syndrome */
- raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
- (void **)&dataptrs);
+ for (i = 0; i < NDISKS-1; i++)
+ for (j = i+1; j < NDISKS; j++)
+ err += test_disks(i, j);
+ }
- for (i = 0; i < NDISKS-1; i++)
- for (j = i+1; j < NDISKS; j++)
- err += test_disks(i, j);
- }
}
printf("\n");
}
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_tilegx$# = {
raid6_tilegx$#_gen_syndrome,
+ NULL, /* XOR not yet implemented */
NULL,
"tilegx$#",
0
diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c
index 9846ff7428b3..c227cc43ec0a 100644
--- a/lib/test-hexdump.c
+++ b/lib/test-hexdump.c
@@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
char test[32 * 3 + 2 + 32 + 1];
char real[32 * 3 + 2 + 32 + 1];
char *p;
- const char **result;
+ const char * const *result;
size_t l = len;
int gs = groupsize, rs = rowsize;
unsigned int i;
OpenPOWER on IntegriCloud