posix-aio-compat.c 7.86 KB
/*
 * QEMU posix-aio emulation
 *
 * Copyright IBM, Corp. 2008
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

#include <sys/ioctl.h>
#include <pthread.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "osdep.h"
#include "qemu-common.h"

#include "posix-aio-compat.h"

static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
static pthread_t thread_id;
static pthread_attr_t attr;
static int max_threads = 64;
static int cur_threads = 0;
static int idle_threads = 0;
static TAILQ_HEAD(, qemu_paiocb) request_list;

static void die2(int err, const char *what)
{
    fprintf(stderr, "%s failed: %s\n", what, strerror(err));
    abort();
}

static void die(const char *what)
{
    die2(errno, what);
}

static void mutex_lock(pthread_mutex_t *mutex)
{
    int ret = pthread_mutex_lock(mutex);
    if (ret) die2(ret, "pthread_mutex_lock");
}

static void mutex_unlock(pthread_mutex_t *mutex)
{
    int ret = pthread_mutex_unlock(mutex);
    if (ret) die2(ret, "pthread_mutex_unlock");
}

static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
                           struct timespec *ts)
{
    int ret = pthread_cond_timedwait(cond, mutex, ts);
    if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
    return ret;
}

static void cond_signal(pthread_cond_t *cond)
{
    int ret = pthread_cond_signal(cond);
    if (ret) die2(ret, "pthread_cond_signal");
}

static void thread_create(pthread_t *thread, pthread_attr_t *attr,
                          void *(*start_routine)(void*), void *arg)
{
    int ret = pthread_create(thread, attr, start_routine, arg);
    if (ret) die2(ret, "pthread_create");
}

static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
{
	int ret;

	ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
	if (ret == -1)
		return -errno;
	return ret;
}

/*
 * Check if we need to copy the data in the aiocb into a new
 * properly aligned buffer.
 */
static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
{
    if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
        int i;

        for (i = 0; i < aiocb->aio_niov; i++)
            if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
                return 1;
    }

    return 0;
}

static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
{
    size_t offset = 0;
    size_t len;

    while (offset < aiocb->aio_nbytes) {
         if (aiocb->aio_type == QEMU_PAIO_WRITE)
             len = pwrite(aiocb->aio_fildes,
                          (const char *)buf + offset,
                          aiocb->aio_nbytes - offset,
                          aiocb->aio_offset + offset);
         else
             len = pread(aiocb->aio_fildes,
                         buf + offset,
                         aiocb->aio_nbytes - offset,
                         aiocb->aio_offset + offset);

         if (len == -1 && errno == EINTR)
             continue;
         else if (len == -1) {
             offset = -errno;
             break;
         } else if (len == 0)
             break;

         offset += len;
    }

    return offset;
}

static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
{
    size_t nbytes;
    char *buf;

    if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
        /*
         * If there is just a single buffer, and it is properly aligned
         * we can just use plain pread/pwrite without any problems.
         */
        return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
    }

    /*
     * Ok, we have to do it the hard way, copy all segments into
     * a single aligned buffer.
     */
    buf = qemu_memalign(512, aiocb->aio_nbytes);
    if (aiocb->aio_type == QEMU_PAIO_WRITE) {
        char *p = buf;
        int i;

        for (i = 0; i < aiocb->aio_niov; ++i) {
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
            p += aiocb->aio_iov[i].iov_len;
        }
    }

    nbytes = handle_aiocb_rw_linear(aiocb, buf);
    if (aiocb->aio_type != QEMU_PAIO_WRITE) {
        char *p = buf;
        size_t count = aiocb->aio_nbytes, copy;
        int i;

        for (i = 0; i < aiocb->aio_niov && count; ++i) {
            copy = count;
            if (copy > aiocb->aio_iov[i].iov_len)
                copy = aiocb->aio_iov[i].iov_len;
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
            p     += copy;
            count -= copy;
        }
    }
    qemu_vfree(buf);

    return nbytes;
}

static void *aio_thread(void *unused)
{
    pid_t pid;
    sigset_t set;

    pid = getpid();

    /* block all signals */
    if (sigfillset(&set)) die("sigfillset");
    if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask");

    while (1) {
        struct qemu_paiocb *aiocb;
        size_t ret = 0;
        qemu_timeval tv;
        struct timespec ts;

        qemu_gettimeofday(&tv);
        ts.tv_sec = tv.tv_sec + 10;
        ts.tv_nsec = 0;

        mutex_lock(&lock);

        while (TAILQ_EMPTY(&request_list) &&
               !(ret == ETIMEDOUT)) {
            ret = cond_timedwait(&cond, &lock, &ts);
        }

        if (TAILQ_EMPTY(&request_list))
            break;

        aiocb = TAILQ_FIRST(&request_list);
        TAILQ_REMOVE(&request_list, aiocb, node);
        aiocb->active = 1;
        idle_threads--;
        mutex_unlock(&lock);

        switch (aiocb->aio_type) {
        case QEMU_PAIO_READ:
        case QEMU_PAIO_WRITE:
		ret = handle_aiocb_rw(aiocb);
		break;
        case QEMU_PAIO_IOCTL:
		ret = handle_aiocb_ioctl(aiocb);
		break;
	default:
		fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
		ret = -EINVAL;
		break;
	}

        mutex_lock(&lock);
        aiocb->ret = ret;
        idle_threads++;
        mutex_unlock(&lock);

        if (kill(pid, aiocb->ev_signo)) die("kill failed");
    }

    idle_threads--;
    cur_threads--;
    mutex_unlock(&lock);

    return NULL;
}

static void spawn_thread(void)
{
    cur_threads++;
    idle_threads++;
    thread_create(&thread_id, &attr, aio_thread, NULL);
}

int qemu_paio_init(struct qemu_paioinit *aioinit)
{
    int ret;

    ret = pthread_attr_init(&attr);
    if (ret) die2(ret, "pthread_attr_init");

    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
    if (ret) die2(ret, "pthread_attr_setdetachstate");

    TAILQ_INIT(&request_list);

    return 0;
}

static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
{
    aiocb->aio_type = type;
    aiocb->ret = -EINPROGRESS;
    aiocb->active = 0;
    mutex_lock(&lock);
    if (idle_threads == 0 && cur_threads < max_threads)
        spawn_thread();
    TAILQ_INSERT_TAIL(&request_list, aiocb, node);
    mutex_unlock(&lock);
    cond_signal(&cond);

    return 0;
}

int qemu_paio_read(struct qemu_paiocb *aiocb)
{
    return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
}

int qemu_paio_write(struct qemu_paiocb *aiocb)
{
    return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
}

int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
{
    return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
}

ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
{
    ssize_t ret;

    mutex_lock(&lock);
    ret = aiocb->ret;
    mutex_unlock(&lock);

    return ret;
}

int qemu_paio_error(struct qemu_paiocb *aiocb)
{
    ssize_t ret = qemu_paio_return(aiocb);

    if (ret < 0)
        ret = -ret;
    else
        ret = 0;

    return ret;
}

int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb)
{
    int ret;

    mutex_lock(&lock);
    if (!aiocb->active) {
        TAILQ_REMOVE(&request_list, aiocb, node);
        aiocb->ret = -ECANCELED;
        ret = QEMU_PAIO_CANCELED;
    } else if (aiocb->ret == -EINPROGRESS)
        ret = QEMU_PAIO_NOTCANCELED;
    else
        ret = QEMU_PAIO_ALLDONE;
    mutex_unlock(&lock);

    return ret;
}