summaryrefslogtreecommitdiffstats
path: root/tools/perf/bench
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/futex-hash.c6
-rw-r--r--tools/perf/bench/futex-lock-pi.c6
-rw-r--r--tools/perf/bench/mem-functions.c77
-rw-r--r--tools/perf/bench/numa.c7
4 files changed, 42 insertions, 54 deletions
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 8024cd5febd2..da04b8c5568a 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -63,8 +63,9 @@ static const char * const bench_futex_hash_usage[] = {
static void *workerfn(void *arg)
{
int ret;
- unsigned int i;
struct worker *w = (struct worker *) arg;
+ unsigned int i;
+ unsigned long ops = w->ops; /* avoid cacheline bouncing */
pthread_mutex_lock(&thread_lock);
threads_starting--;
@@ -74,7 +75,7 @@ static void *workerfn(void *arg)
pthread_mutex_unlock(&thread_lock);
do {
- for (i = 0; i < nfutexes; i++, w->ops++) {
+ for (i = 0; i < nfutexes; i++, ops++) {
/*
* We want the futex calls to fail in order to stress
* the hashing of uaddr and not measure other steps,
@@ -88,6 +89,7 @@ static void *workerfn(void *arg)
}
} while (!done);
+ w->ops = ops;
return NULL;
}
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 936d89d30483..91877777ec6e 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -48,7 +48,7 @@ static const struct option options[] = {
};
static const char * const bench_futex_lock_pi_usage[] = {
- "perf bench futex requeue <options>",
+ "perf bench futex lock-pi <options>",
NULL
};
@@ -75,6 +75,7 @@ static void toggle_done(int sig __maybe_unused,
static void *workerfn(void *arg)
{
struct worker *w = (struct worker *) arg;
+ unsigned long ops = w->ops;
pthread_mutex_lock(&thread_lock);
threads_starting--;
@@ -103,9 +104,10 @@ static void *workerfn(void *arg)
if (ret && !silent)
warn("thread %d: Could not unlock pi-lock for %p (%d)",
w->tid, w->futex, ret);
- w->ops++; /* account for thread's share of work */
+ ops++; /* account for thread's share of work */
} while (!done);
+ w->ops = ops;
return NULL;
}
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index c684910e5a48..52504a83b5a1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -106,9 +106,10 @@ static double timeval2double(struct timeval *ts)
struct bench_mem_info {
const struct function *functions;
- u64 (*do_cycles)(const struct function *r, size_t size);
- double (*do_gettimeofday)(const struct function *r, size_t size);
+ u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
+ double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
const char *const *usage;
+ bool alloc_src;
};
static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
@@ -116,16 +117,26 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
const struct function *r = &info->functions[r_idx];
double result_bps = 0.0;
u64 result_cycles = 0;
+ void *src = NULL, *dst = zalloc(size);
printf("# function '%s' (%s)\n", r->name, r->desc);
+ if (dst == NULL)
+ goto out_alloc_failed;
+
+ if (info->alloc_src) {
+ src = zalloc(size);
+ if (src == NULL)
+ goto out_alloc_failed;
+ }
+
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s bytes ...\n\n", size_str);
if (use_cycles) {
- result_cycles = info->do_cycles(r, size);
+ result_cycles = info->do_cycles(r, size, src, dst);
} else {
- result_bps = info->do_gettimeofday(r, size);
+ result_bps = info->do_gettimeofday(r, size, src, dst);
}
switch (bench_format) {
@@ -149,6 +160,14 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
BUG_ON(1);
break;
}
+
+out_free:
+ free(src);
+ free(dst);
+ return;
+out_alloc_failed:
+ printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+ goto out_free;
}
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
@@ -201,28 +220,14 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
-static void memcpy_alloc_mem(void **dst, void **src, size_t size)
-{
- *dst = zalloc(size);
- if (!*dst)
- die("memory allocation failed - maybe size is too large?\n");
-
- *src = zalloc(size);
- if (!*src)
- die("memory allocation failed - maybe size is too large?\n");
-
- /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
- memset(*src, 0, size);
-}
-
-static u64 do_memcpy_cycles(const struct function *r, size_t size)
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
- void *src = NULL, *dst = NULL;
memcpy_t fn = r->fn.memcpy;
int i;
- memcpy_alloc_mem(&dst, &src, size);
+ /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
+ memset(src, 0, size);
/*
* We prefault the freshly allocated memory range here,
@@ -235,20 +240,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size)
fn(dst, src, size);
cycle_end = get_cycles();
- free(src);
- free(dst);
return cycle_end - cycle_start;
}
-static double do_memcpy_gettimeofday(const struct function *r, size_t size)
+static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
{
struct timeval tv_start, tv_end, tv_diff;
memcpy_t fn = r->fn.memcpy;
- void *src = NULL, *dst = NULL;
int i;
- memcpy_alloc_mem(&dst, &src, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -262,9 +262,6 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size)
timersub(&tv_end, &tv_start, &tv_diff);
- free(src);
- free(dst);
-
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
}
@@ -294,27 +291,18 @@ int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unu
.do_cycles = do_memcpy_cycles,
.do_gettimeofday = do_memcpy_gettimeofday,
.usage = bench_mem_memcpy_usage,
+ .alloc_src = true,
};
return bench_mem_common(argc, argv, &info);
}
-static void memset_alloc_mem(void **dst, size_t size)
-{
- *dst = zalloc(size);
- if (!*dst)
- die("memory allocation failed - maybe size is too large?\n");
-}
-
-static u64 do_memset_cycles(const struct function *r, size_t size)
+static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
memset_t fn = r->fn.memset;
- void *dst = NULL;
int i;
- memset_alloc_mem(&dst, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -326,19 +314,15 @@ static u64 do_memset_cycles(const struct function *r, size_t size)
fn(dst, i, size);
cycle_end = get_cycles();
- free(dst);
return cycle_end - cycle_start;
}
-static double do_memset_gettimeofday(const struct function *r, size_t size)
+static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
{
struct timeval tv_start, tv_end, tv_diff;
memset_t fn = r->fn.memset;
- void *dst = NULL;
int i;
- memset_alloc_mem(&dst, size);
-
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
@@ -352,7 +336,6 @@ static double do_memset_gettimeofday(const struct function *r, size_t size)
timersub(&tv_end, &tv_start, &tv_diff);
- free(dst);
return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 8efe904e486b..3083fc36282b 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -43,6 +43,7 @@
/*
* Debug printf:
*/
+#undef dprintf
#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
struct thread_data {
@@ -1573,13 +1574,13 @@ static int __bench_numa(const char *name)
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
- char tname[32];
+ char tname[14 + 2 * 10 + 1];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {
- memset(tname, 0, 32);
+ memset(tname, 0, sizeof(tname));
td = g->threads + p*g->p.nr_threads + t;
- snprintf(tname, 32, "process%d:thread%d", p, t);
+ snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
print_res(tname, td->speed_gbs,
"GB/sec", "thread-speed", "GB/sec/thread speed");
print_res(tname, td->system_time_ns / NSEC_PER_SEC,
OpenPOWER on IntegriCloud