summaryrefslogtreecommitdiffstats
path: root/tools/perf/bench/mem-memcpy.c
blob: 52e646e3e87378d949e3ebc8da2aea88a8b42dd1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*
 * mem-memcpy.c
 *
 * memcpy: Simple memory copy in various ways
 *
 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
 */
#include <ctype.h>

#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../util/header.h"
#include "bench.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <errno.h>

#define K 1024

static const char	*length_str	= "1MB";
static const char	*routine	= "default";
static int		use_clock	= 0;
static int		clock_fd;

static const struct option options[] = {
	OPT_STRING('l', "length", &length_str, "1MB",
		    "Specify length of memory to copy. "
		    "available unit: B, MB, GB (upper and lower)"),
	OPT_STRING('r', "routine", &routine, "default",
		    "Specify routine to copy"),
	OPT_BOOLEAN('c', "clock", &use_clock,
		    "Use CPU clock for measuring"),
	OPT_END()
};

struct routine {
	const char *name;
	const char *desc;
	void * (*fn)(void *dst, const void *src, size_t len);
};

struct routine routines[] = {
	{ "default",
	  "Default memcpy() provided by glibc",
	  memcpy },
	{ NULL,
	  NULL,
	  NULL   }
};

static const char * const bench_mem_memcpy_usage[] = {
	"perf bench mem memcpy <options>",
	NULL
};

static struct perf_event_attr clock_attr = {
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES
};

static void init_clock(void)
{
	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);

	if (clock_fd < 0 && errno == ENOSYS)
		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
	else
		BUG_ON(clock_fd < 0);
}

static u64 get_clock(void)
{
	int ret;
	u64 clk;

	ret = read(clock_fd, &clk, sizeof(u64));
	BUG_ON(ret != sizeof(u64));

	return clk;
}

static double timeval2double(struct timeval *ts)
{
	return (double)ts->tv_sec +
		(double)ts->tv_usec / (double)1000000;
}

int bench_mem_memcpy(int argc, const char **argv,
		     const char *prefix __used)
{
	int i;
	void *dst, *src;
	size_t length;
	double bps = 0.0;
	struct timeval tv_start, tv_end, tv_diff;
	u64 clock_start, clock_end, clock_diff;

	clock_start = clock_end = clock_diff = 0ULL;
	argc = parse_options(argc, argv, options,
			     bench_mem_memcpy_usage, 0);

	tv_diff.tv_sec = 0;
	tv_diff.tv_usec = 0;
	length = (size_t)perf_atoll((char *)length_str);

	if ((s64)length <= 0) {
		fprintf(stderr, "Invalid length:%s\n", length_str);
		return 1;
	}

	for (i = 0; routines[i].name; i++) {
		if (!strcmp(routines[i].name, routine))
			break;
	}
	if (!routines[i].name) {
		printf("Unknown routine:%s\n", routine);
		printf("Available routines...\n");
		for (i = 0; routines[i].name; i++) {
			printf("\t%s ... %s\n",
			       routines[i].name, routines[i].desc);
		}
		return 1;
	}

	dst = zalloc(length);
	if (!dst)
		die("memory allocation failed - maybe length is too large?\n");

	src = zalloc(length);
	if (!src)
		die("memory allocation failed - maybe length is too large?\n");

	if (bench_format == BENCH_FORMAT_DEFAULT) {
		printf("# Copying %s Bytes from %p to %p ...\n\n",
		       length_str, src, dst);
	}

	if (use_clock) {
		init_clock();
		clock_start = get_clock();
	} else {
		BUG_ON(gettimeofday(&tv_start, NULL));
	}

	routines[i].fn(dst, src, length);

	if (use_clock) {
		clock_end = get_clock();
		clock_diff = clock_end - clock_start;
	} else {
		BUG_ON(gettimeofday(&tv_end, NULL));
		timersub(&tv_end, &tv_start, &tv_diff);
		bps = (double)((double)length / timeval2double(&tv_diff));
	}

	switch (bench_format) {
	case BENCH_FORMAT_DEFAULT:
		if (use_clock) {
			printf(" %14lf Clock/Byte\n",
			       (double)clock_diff / (double)length);
		} else {
			if (bps < K)
				printf(" %14lf B/Sec\n", bps);
			else if (bps < K * K)
				printf(" %14lfd KB/Sec\n", bps / 1024);
			else if (bps < K * K * K)
				printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
			else {
				printf(" %14lf GB/Sec\n",
				       bps / 1024 / 1024 / 1024);
			}
		}
		break;
	case BENCH_FORMAT_SIMPLE:
		if (use_clock) {
			printf("%14lf\n",
			       (double)clock_diff / (double)length);
		} else
			printf("%lf\n", bps);
		break;
	default:
		/* reaching this means there's some disaster: */
		die("unknown format: %d\n", bench_format);
		break;
	}

	return 0;
}
OpenPOWER on IntegriCloud