summaryrefslogtreecommitdiffstats
path: root/tools/io_uring
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-05-22 08:59:12 -0600
committerJens Axboe <axboe@kernel.dk>2019-05-23 10:25:26 -0600
commit004d564f908790efe815a6510a542ac1227ef2a2 (patch)
tree5d0e93aec7fe9a7f892ed794f09e99a34048a45c /tools/io_uring
parent486f069253c3c738dec62daeb16f7232b2cca065 (diff)
downloadblackbird-op-linux-004d564f908790efe815a6510a542ac1227ef2a2.tar.gz
blackbird-op-linux-004d564f908790efe815a6510a542ac1227ef2a2.zip
tools/io_uring: sync with liburing
Various fixes and changes have been applied to liburing since we copied some select bits to the kernel testing/examples part, sync up with liburing to get those changes. Most notable is the change that split the CQE reading into the peek and seen event, instead of being just a single function. Also fixes an unsigned wrap issue in io_uring_submit(), leak of 'fd' in setup if we fail, and various other little issues. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'tools/io_uring')
-rw-r--r--tools/io_uring/io_uring-cp.c21
-rw-r--r--tools/io_uring/liburing.h64
-rw-r--r--tools/io_uring/queue.c36
-rw-r--r--tools/io_uring/setup.c10
-rw-r--r--tools/io_uring/syscall.c48
5 files changed, 118 insertions, 61 deletions
diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c
index 633f65bb43a7..81461813ec62 100644
--- a/tools/io_uring/io_uring-cp.c
+++ b/tools/io_uring/io_uring-cp.c
@@ -13,6 +13,7 @@
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
+#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
@@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset)
struct io_uring_sqe *sqe;
struct io_data *data;
+ data = malloc(size + sizeof(*data));
+ if (!data)
+ return 1;
+
sqe = io_uring_get_sqe(ring);
- if (!sqe)
+ if (!sqe) {
+ free(data);
return 1;
+ }
- data = malloc(size + sizeof(*data));
data->read = 1;
data->offset = data->first_offset = offset;
@@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize)
struct io_data *data;
if (!got_comp) {
- ret = io_uring_wait_completion(ring, &cqe);
+ ret = io_uring_wait_cqe(ring, &cqe);
got_comp = 1;
} else
- ret = io_uring_get_completion(ring, &cqe);
+ ret = io_uring_peek_cqe(ring, &cqe);
if (ret < 0) {
- fprintf(stderr, "io_uring_get_completion: %s\n",
+ fprintf(stderr, "io_uring_peek_cqe: %s\n",
strerror(-ret));
return 1;
}
if (!cqe)
break;
- data = (struct io_data *) (uintptr_t) cqe->user_data;
+ data = io_uring_cqe_get_data(cqe);
if (cqe->res < 0) {
if (cqe->res == -EAGAIN) {
queue_prepped(ring, data);
+ io_uring_cqe_seen(ring, cqe);
continue;
}
fprintf(stderr, "cqe failed: %s\n",
@@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
data->iov.iov_len -= cqe->res;
data->offset += cqe->res;
queue_prepped(ring, data);
+ io_uring_cqe_seen(ring, cqe);
continue;
}
@@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
free(data);
writes--;
}
+ io_uring_cqe_seen(ring, cqe);
}
}
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index cab0f50257ba..5f305c86b892 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -1,10 +1,16 @@
#ifndef LIB_URING_H
#define LIB_URING_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#include <sys/uio.h>
#include <signal.h>
#include <string.h>
#include "../../include/uapi/linux/io_uring.h"
+#include <inttypes.h>
+#include "barrier.h"
/*
* Library interface to io_uring
@@ -46,7 +52,7 @@ struct io_uring {
* System calls
*/
extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
-extern int io_uring_enter(unsigned fd, unsigned to_submit,
+extern int io_uring_enter(int fd, unsigned to_submit,
unsigned min_complete, unsigned flags, sigset_t *sig);
extern int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args);
@@ -59,14 +65,33 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
struct io_uring *ring);
extern void io_uring_queue_exit(struct io_uring *ring);
-extern int io_uring_get_completion(struct io_uring *ring,
+extern int io_uring_peek_cqe(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr);
-extern int io_uring_wait_completion(struct io_uring *ring,
+extern int io_uring_wait_cqe(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr);
extern int io_uring_submit(struct io_uring *ring);
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
/*
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
+ * been processed by the application.
+ */
+static inline void io_uring_cqe_seen(struct io_uring *ring,
+ struct io_uring_cqe *cqe)
+{
+ if (cqe) {
+ struct io_uring_cq *cq = &ring->cq;
+
+ (*cq->khead)++;
+ /*
+ * Ensure that the kernel sees our new head, the kernel has
+ * the matching read barrier.
+ */
+ write_barrier();
+ }
+}
+
+/*
* Command prep helpers
*/
static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
@@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
sqe->user_data = (unsigned long) data;
}
+static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
+{
+ return (void *) (uintptr_t) cqe->user_data;
+}
+
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
- void *addr, unsigned len, off_t offset)
+ const void *addr, unsigned len,
+ off_t offset)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = op;
@@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
}
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
- struct iovec *iovecs, unsigned nr_vecs,
- off_t offset)
+ const struct iovec *iovecs,
+ unsigned nr_vecs, off_t offset)
{
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
}
@@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
}
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
- struct iovec *iovecs, unsigned nr_vecs,
- off_t offset)
+ const struct iovec *iovecs,
+ unsigned nr_vecs, off_t offset)
{
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
}
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
- void *buf, unsigned nbytes,
+ const void *buf, unsigned nbytes,
off_t offset)
{
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
@@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
}
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
- int datasync)
+ unsigned fsync_flags)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_FSYNC;
sqe->fd = fd;
- if (datasync)
- sqe->fsync_flags = IORING_FSYNC_DATASYNC;
+ sqe->fsync_flags = fsync_flags;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_NOP;
+}
+
+#ifdef __cplusplus
}
+#endif
#endif
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
index 88505e873ad9..321819c132c7 100644
--- a/tools/io_uring/queue.c
+++ b/tools/io_uring/queue.c
@@ -8,8 +8,8 @@
#include "liburing.h"
#include "barrier.h"
-static int __io_uring_get_completion(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr, int wait)
+static int __io_uring_get_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr, int wait)
{
struct io_uring_cq *cq = &ring->cq;
const unsigned mask = *cq->kring_mask;
@@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring,
return -errno;
} while (1);
- if (*cqe_ptr) {
- *cq->khead = head + 1;
- /*
- * Ensure that the kernel sees our new head, the kernel has
- * the matching read barrier.
- */
- write_barrier();
- }
-
return 0;
}
/*
- * Return an IO completion, if one is readily available
+ * Return an IO completion, if one is readily available. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
*/
-int io_uring_get_completion(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr)
+int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
{
- return __io_uring_get_completion(ring, cqe_ptr, 0);
+ return __io_uring_get_cqe(ring, cqe_ptr, 0);
}
/*
- * Return an IO completion, waiting for it if necessary
+ * Return an IO completion, waiting for it if necessary. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
*/
-int io_uring_wait_completion(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr)
+int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
{
- return __io_uring_get_completion(ring, cqe_ptr, 1);
+ return __io_uring_get_cqe(ring, cqe_ptr, 1);
}
/*
@@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
- unsigned ktail, ktail_next, submitted;
+ unsigned ktail, ktail_next, submitted, to_submit;
int ret;
/*
@@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring)
*/
submitted = 0;
ktail = ktail_next = *sq->ktail;
- while (sq->sqe_head < sq->sqe_tail) {
+ to_submit = sq->sqe_tail - sq->sqe_head;
+ while (to_submit--) {
ktail_next++;
read_barrier();
@@ -136,7 +128,7 @@ submit:
if (ret < 0)
return -errno;
- return 0;
+ return ret;
}
/*
diff --git a/tools/io_uring/setup.c b/tools/io_uring/setup.c
index 4da19a77132c..0b50fcd78520 100644
--- a/tools/io_uring/setup.c
+++ b/tools/io_uring/setup.c
@@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p,
sq->kdropped = ptr + p->sq_off.dropped;
sq->array = ptr + p->sq_off.array;
- size = p->sq_entries * sizeof(struct io_uring_sqe),
+ size = p->sq_entries * sizeof(struct io_uring_sqe);
sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQES);
@@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
{
struct io_uring_params p;
- int fd;
+ int fd, ret;
memset(&p, 0, sizeof(p));
p.flags = flags;
@@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
if (fd < 0)
return fd;
- return io_uring_queue_mmap(fd, &p, ring);
+ ret = io_uring_queue_mmap(fd, &p, ring);
+ if (ret)
+ close(fd);
+
+ return ret;
}
void io_uring_queue_exit(struct io_uring *ring)
diff --git a/tools/io_uring/syscall.c b/tools/io_uring/syscall.c
index 6b835e5c6a5b..b22e0aa54e9d 100644
--- a/tools/io_uring/syscall.c
+++ b/tools/io_uring/syscall.c
@@ -7,34 +7,46 @@
#include <signal.h>
#include "liburing.h"
-#if defined(__x86_64) || defined(__i386__)
-#ifndef __NR_sys_io_uring_setup
-#define __NR_sys_io_uring_setup 425
-#endif
-#ifndef __NR_sys_io_uring_enter
-#define __NR_sys_io_uring_enter 426
-#endif
-#ifndef __NR_sys_io_uring_register
-#define __NR_sys_io_uring_register 427
-#endif
-#else
-#error "Arch not supported yet"
+#ifdef __alpha__
+/*
+ * alpha is the only exception, all other architectures
+ * have common numbers for new system calls.
+ */
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 535
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 536
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 537
+# endif
+#else /* !__alpha__ */
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 425
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 426
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 427
+# endif
#endif
int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args)
{
- return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args);
+ return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
}
-int io_uring_setup(unsigned entries, struct io_uring_params *p)
+int io_uring_setup(unsigned int entries, struct io_uring_params *p)
{
- return syscall(__NR_sys_io_uring_setup, entries, p);
+ return syscall(__NR_io_uring_setup, entries, p);
}
-int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete,
- unsigned flags, sigset_t *sig)
+int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
+ unsigned int flags, sigset_t *sig)
{
- return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
+ return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
flags, sig, _NSIG / 8);
}
OpenPOWER on IntegriCloud