diff options
author | Martin Jansa <martin.jansa@gmail.com> | 2023-10-19 13:11:11 +0200 |
---|---|---|
committer | Martin Jansa <martin.jansa@gmail.com> | 2023-10-19 13:45:53 +0200 |
commit | 38c813cffdf4931d16044bfe662880d63cbdcfa3 (patch) | |
tree | 906173f8b4f91129cd02572e556482e156ac090a | |
parent | 4f84537670020a8d902248479efa9f062089c0d3 (diff) | |
download | openembedded-core-contrib-jansa/io_uring.tar.gz |
io-uring-writev: add simple test for writting file with io_uringjansa/io_uring
* pseudo doesn't support io_uring yet as shown after nodejs was upgraded
and nodejs-native >= 20.3.0 with libuv >= 1.45.0 which has:
https://github.com/libuv/libuv/pull/3952
* files created in do_install with nodejs-native aren't tracked by pseudo
and will result in host-user-contamination QA issue or
"KeyError: 'getpwuid(): uid not found" as documented in:
https://github.com/shr-project/com.webos.app.minimal/commit/bd238047c8ce3cd085041d276613396b863213cf
* this is much simpler test for io_uring without the need to build whole
nodejs-native, it's based on:
https://unixism.net/2020/04/io-uring-by-example-part-1-introduction/
just using writev instead of readv
* if it works fine, the file "test" will be tracked in pseudo database
since the creation in ${D} like:
core2-64-oe-linux/io-uring-writev/1.0 $ sqlite3 pseudo/files.db "select * from files"
1|/OE/build/oe-core/tmp-glibc/work/core2-64-oe-linux/io-uring-writev/1.0/image|66305|48357743|0|0|16877|0|0
2|/OE/build/oe-core/tmp-glibc/work/core2-64-oe-linux/io-uring-writev/1.0/image/test|66305|48316709|0|0|33188|0|0
and it does in this case, because I haven't figured out how to call writev()
without opening the fd of output file first where the openat() call gets
intercepted by pseudo
io-uring-writev/1.0 $ strace -v ./io-uring-writev test2 2>&1 | grep openat
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "test2", O_WRONLY|O_CREAT, 0666) = 4
while with libuv there was no openat() for the output files in strace
Signed-off-by: Martin Jansa <martin.jansa@gmail.com>
-rw-r--r-- | meta-selftest/recipes-test/io-uring/io-uring-writev.bb | 18 | ||||
-rw-r--r-- | meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c | 389 |
2 files changed, 407 insertions, 0 deletions
diff --git a/meta-selftest/recipes-test/io-uring/io-uring-writev.bb b/meta-selftest/recipes-test/io-uring/io-uring-writev.bb new file mode 100644 index 0000000000..8b2ca6005b --- /dev/null +++ b/meta-selftest/recipes-test/io-uring/io-uring-writev.bb @@ -0,0 +1,18 @@ +DESCRIPTION = "Simple io_uring test" +SECTION = "examples" +LICENSE = "MIT" +LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/MIT;md5=0835ade698e0bcf8506ecda2f7b4f302" + +SRC_URI = "file://io-uring-writev.c" + +S = "${WORKDIR}" + +do_compile() { + ${BUILD_CC} io-uring-writev.c -o io-uring-writev +} + +do_install() { + ${S}/io-uring-writev ${D}/test +} + +FILES:${PN} = "test" diff --git a/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c b/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c new file mode 100644 index 0000000000..a5e4253b7a --- /dev/null +++ b/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c @@ -0,0 +1,389 @@ +/* Taken from + * https://unixism.net/2020/04/io-uring-by-example-part-1-introduction/ + * with small modification to write into files instead of reading them + * to test io_uring support in pseudo (https://git.yoctoproject.org/pseudo/) + * once implemented there + * */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/uio.h> +#include <linux/fs.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> + +/* If your compilation fails because the header file below is missing, + * your kernel is probably too old to support io_uring. + * */ +#include <linux/io_uring.h> + +#define QUEUE_DEPTH 1 +#define BLOCK_SZ 1024 + +/* This is x86 specific */ +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") + +struct app_io_sq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + unsigned *flags; + unsigned *array; +}; + +struct app_io_cq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + struct io_uring_cqe *cqes; +}; + +struct submitter { + int ring_fd; + struct app_io_sq_ring sq_ring; + struct io_uring_sqe *sqes; + struct app_io_cq_ring cq_ring; +}; + +struct file_info { + off_t file_sz; + struct iovec iovecs[]; /* Referred by readv/writev */ +}; + +/* + * This code is written in the days when io_uring-related system calls are not + * part of standard C libraries. So, we roll our own system call wrapper + * functions. + * */ + +int io_uring_setup(unsigned entries, struct io_uring_params *p) +{ + return (int) syscall(__NR_io_uring_setup, entries, p); +} + +int io_uring_enter(int ring_fd, unsigned int to_submit, + unsigned int min_complete, unsigned int flags) +{ + return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete, + flags, NULL, 0); +} + +/* + * Returns the size of the file whose open file descriptor is passed in. + * Properly handles regular file and block devices as well. Pretty. + * */ + +off_t get_file_size(int fd) { + struct stat st; + + if(fstat(fd, &st) < 0) { + perror("fstat"); + return -1; + } + if (S_ISBLK(st.st_mode)) { + unsigned long long bytes; + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) { + perror("ioctl"); + return -1; + } + return bytes; + } else if (S_ISREG(st.st_mode)) + return st.st_size; + + return -1; +} + +/* + * io_uring requires a lot of setup which looks pretty hairy, but isn't all + * that difficult to understand. Because of all this boilerplate code, + * io_uring's author has created liburing, which is relatively easy to use. + * However, you should take your time and understand this code. It is always + * good to know how it all works underneath. Apart from bragging rights, + * it does offer you a certain strange geeky peace. + * */ + +int app_setup_uring(struct submitter *s) { + struct app_io_sq_ring *sring = &s->sq_ring; + struct app_io_cq_ring *cring = &s->cq_ring; + struct io_uring_params p; + void *sq_ptr, *cq_ptr; + + /* + * We need to pass in the io_uring_params structure to the io_uring_setup() + * call zeroed out. We could set any flags if we need to, but for this + * example, we don't. + * */ + memset(&p, 0, sizeof(p)); + s->ring_fd = io_uring_setup(QUEUE_DEPTH, &p); + if (s->ring_fd < 0) { + perror("io_uring_setup"); + return 1; + } + + /* + * io_uring communication happens via 2 shared kernel-user space ring buffers, + * which can be jointly mapped with a single mmap() call in recent kernels. + * While the completion queue is directly manipulated, the submission queue + * has an indirection array in between. We map that in as well. + * */ + + int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned); + int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe); + + /* In kernel version 5.4 and above, it is possible to map the submission and + * completion buffers with a single mmap() call. Rather than check for kernel + * versions, the recommended way is to just check the features field of the + * io_uring_params structure, which is a bit mask. If the + * IORING_FEAT_SINGLE_MMAP is set, then we can do away with the second mmap() + * call to map the completion ring. + * */ + if (p.features & IORING_FEAT_SINGLE_MMAP) { + if (cring_sz > sring_sz) { + sring_sz = cring_sz; + } + cring_sz = sring_sz; + } + + /* Map in the submission and completion queue ring buffers. + * Older kernels only map in the submission queue, though. + * */ + sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + s->ring_fd, IORING_OFF_SQ_RING); + if (sq_ptr == MAP_FAILED) { + perror("mmap"); + return 1; + } + + if (p.features & IORING_FEAT_SINGLE_MMAP) { + cq_ptr = sq_ptr; + } else { + /* Map in the completion queue ring buffer in older kernels separately */ + cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + s->ring_fd, IORING_OFF_CQ_RING); + if (cq_ptr == MAP_FAILED) { + perror("mmap"); + return 1; + } + } + /* Save useful fields in a global app_io_sq_ring struct for later + * easy reference */ + sring->head = sq_ptr + p.sq_off.head; + sring->tail = sq_ptr + p.sq_off.tail; + sring->ring_mask = sq_ptr + p.sq_off.ring_mask; + sring->ring_entries = sq_ptr + p.sq_off.ring_entries; + sring->flags = sq_ptr + p.sq_off.flags; + sring->array = sq_ptr + p.sq_off.array; + + /* Map in the submission queue entries array */ + s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + s->ring_fd, IORING_OFF_SQES); + if (s->sqes == MAP_FAILED) { + perror("mmap"); + return 1; + } + + /* Save useful fields in a global app_io_cq_ring struct for later + * easy reference */ + cring->head = cq_ptr + p.cq_off.head; + cring->tail = cq_ptr + p.cq_off.tail; + cring->ring_mask = cq_ptr + p.cq_off.ring_mask; + cring->ring_entries = cq_ptr + p.cq_off.ring_entries; + cring->cqes = cq_ptr + p.cq_off.cqes; + + return 0; +} + +/* + * Output a string of characters of len length to stdout. + * We use buffered output here to be efficient, + * since we need to output character-by-character. + * */ +void output_to_console(char *buf, int len) { + while (len--) { + fputc(*buf++, stdout); + } +} + +/* + * Read from completion queue. + * In this function, we read completion events from the completion queue, get + * the data buffer that will have the file data and print it to the console. + * */ + +void read_from_cq(struct submitter *s) { + struct file_info *fi; + struct app_io_cq_ring *cring = &s->cq_ring; + struct io_uring_cqe *cqe; + unsigned head, reaped = 0; + + head = *cring->head; + + do { + read_barrier(); + /* + * Remember, this is a ring buffer. If head == tail, it means that the + * buffer is empty. + * */ + if (head == *cring->tail) + break; + + /* Get the entry */ + cqe = &cring->cqes[head & *s->cq_ring.ring_mask]; + fi = (struct file_info*) cqe->user_data; + if (cqe->res < 0) + fprintf(stderr, "Error: %s\n", strerror(abs(cqe->res))); + + int blocks = (int) fi->file_sz / BLOCK_SZ; + if (fi->file_sz % BLOCK_SZ) blocks++; + + for (int i = 0; i < blocks; i++) + output_to_console(fi->iovecs[i].iov_base, fi->iovecs[i].iov_len); + + head++; + } while (1); + + *cring->head = head; + write_barrier(); +} +/* + * Submit to submission queue. + * In this function, we submit requests to the submission queue. You can submit + * many types of requests. Ours is going to be the readv() request, which we + * specify via IORING_OP_READV. + * + * */ +int submit_to_sq(char *file_path, struct submitter *s) { + struct file_info *fi; + + int file_fd = open(file_path, O_WRONLY|O_CREAT, 0666); + if (file_fd < 0 ) { + perror("open"); + return 1; + } + + struct app_io_sq_ring *sring = &s->sq_ring; + unsigned index = 0, current_block = 0, tail = 0, next_tail = 0; + + char *bark = "Hello IO!"; + off_t file_sz = strlen(bark); + if (file_sz < 0) + return 1; + off_t bytes_remaining = file_sz; + int blocks = (int) file_sz / BLOCK_SZ; + if (file_sz % BLOCK_SZ) blocks++; + + fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); + if (!fi) { + fprintf(stderr, "Unable to allocate memory\n"); + return 1; + } + fi->file_sz = file_sz; + + /* + * For each block of the file we need to read, we allocate an iovec struct + * which is indexed into the iovecs array. This array is passed in as part + * of the submission. If you don't understand this, then you need to look + * up how the readv() and writev() system calls work. + * */ + /* + while (bytes_remaining) { + off_t bytes_to_read = bytes_remaining; + if (bytes_to_read > BLOCK_SZ) + bytes_to_read = BLOCK_SZ; + + fi->iovecs[current_block].iov_len = bytes_to_read; + + void *buf; + if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) { + perror("posix_memalign"); + return 1; + } + fi->iovecs[current_block].iov_base = buf; + + current_block++; + bytes_remaining -= bytes_to_read; + } + */ + fi->iovecs[current_block].iov_len = bytes_remaining; + fi->iovecs[current_block].iov_base = bark; + + + /* Add our submission queue entry to the tail of the SQE ring buffer */ + next_tail = tail = *sring->tail; + next_tail++; + read_barrier(); + index = tail & *s->sq_ring.ring_mask; + struct io_uring_sqe *sqe = &s->sqes[index]; + sqe->fd = file_fd; + sqe->flags = 0; + sqe->opcode = IORING_OP_WRITEV; + sqe->addr = (unsigned long) fi->iovecs; + sqe->len = blocks; + sqe->off = 0; + sqe->user_data = (unsigned long long) fi; + sring->array[index] = index; + tail = next_tail; + + /* Update the tail so the kernel can see it. */ + if(*sring->tail != tail) { + *sring->tail = tail; + write_barrier(); + } + + /* + * Tell the kernel we have submitted events with the io_uring_enter() system + * call. We also pass in the IOURING_ENTER_GETEVENTS flag which causes the + * io_uring_enter() call to wait until min_complete events (the 3rd param) + * complete. + * */ + int ret = io_uring_enter(s->ring_fd, 1,1, + IORING_ENTER_GETEVENTS); + if(ret < 0) { + perror("io_uring_enter"); + return 1; + } + + return 0; +} + +int main(int argc, char *argv[]) { + struct submitter *s; + + if (argc < 2) { + fprintf(stderr, "Usage: %s <filename>, barks to <filename>\n", argv[0]); + return 1; + } + + s = malloc(sizeof(*s)); + if (!s) { + perror("malloc"); + return 1; + } + memset(s, 0, sizeof(*s)); + + if(app_setup_uring(s)) { + fprintf(stderr, "Unable to setup uring!\n"); + return 1; + } + + for (int i = 1; i < argc; i++) { + if(submit_to_sq(argv[i], s)) { + fprintf(stderr, "Error writting file\n"); + return 1; + } + //read_from_cq(s); + } + + return 0; +} |