Libevent源码分析--- IOCP

Posted 子曰帅

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Libevent源码分析--- IOCP相关的知识,希望对你有一定的参考价值。

关于iocp模型,网上有很多资料,这里不详细分析,下面这篇文章写的非常详细:
完成端口(CompletionPort)详解 - 手把手教你玩转网络编程系列之三

event_base中有一个iocp变量,event_base的初始化函数中会调用event_base_start_iocp开启iocp功能,event_base_start_iocp又会调用event_iocp_port_launch来初始化IOCP:

#ifdef WIN32
    if (cfg && (cfg->flags & EVENT_BASE_FLAG_STARTUP_IOCP))
        event_base_start_iocp(base, cfg->n_cpus_hint);
#endif
int
event_base_start_iocp(struct event_base *base, int n_cpus)

#ifdef WIN32
    if (base->iocp)
        return 0;
    base->iocp = event_iocp_port_launch(n_cpus);
    if (!base->iocp) 
        event_warnx("%s: Couldn't launch IOCP", __func__);
        return -1;
    
    return 0;
#else
    return -1;
#endif

iocp指向一个event_iocp_port结构体:

struct event_iocp_port 
    /** The port itself */
    HANDLE port;
    /* A lock to cover internal structures. */
    CRITICAL_SECTION lock;
    /** Number of threads ever open on the port. */
    short n_threads;
    /** True iff we're shutting down all the threads on this port */
    short shutdown;
    /** How often the threads on this port check for shutdown and other
     * conditions */
    long ms;
    /* The threads that are waiting for events. */
    HANDLE *threads;
    /** Number of threads currently open on this port. */
    short n_live_threads;
    /** A semaphore to signal when we are done shutting down. */
    HANDLE *shutdownSemaphore;
;

其中port使iocp的端口,shutdown,lock和shutdownSemaphore用于关闭iocp。ms是GetQueuedCompletionStatus的等待时间,threads是线程句柄,n_threads代表线程数量,n_live_threads代表当前没有关闭的线程。
event_iocp_port的初始化在event_iocp_port_launch函数中进行:

struct event_iocp_port *event_iocp_port_launch(int n_cpus)

    struct event_iocp_port *port;
    int i;

    if (!extension_fns_initialized)
        init_extension_functions(&the_extension_fns);

    if (!(port = mm_calloc(1, sizeof(struct event_iocp_port))))
        return NULL;

    if (n_cpus <= 0)
        n_cpus = N_CPUS_DEFAULT;
    port->n_threads = n_cpus * 2;
    port->threads = mm_calloc(port->n_threads, sizeof(HANDLE));
    if (!port->threads)
        goto err;

    port->port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0,
            n_cpus);
    port->ms = -1;
    if (!port->port)
        goto err;

    port->shutdownSemaphore = CreateSemaphore(NULL, 0, 1, NULL);
    if (!port->shutdownSemaphore)
        goto err;

    for (i=0; i<port->n_threads; ++i) 
        ev_uintptr_t th = _beginthread(loop, 0, port);
        if (th == (ev_uintptr_t)-1)
            goto err;
        port->threads[i] = (HANDLE)th;
        ++port->n_live_threads;
    

    InitializeCriticalSectionAndSpinCount(&port->lock, 1000);

    return port;
err:
    if (port->port)
        CloseHandle(port->port);
    if (port->threads)
        mm_free(port->threads);
    if (port->shutdownSemaphore)
        CloseHandle(port->shutdownSemaphore);
    mm_free(port);
    return NULL;

event_iocp_port_launch会获取iocp的扩展函数库,然后创建iocp的端口和用于关闭iocp的信号量,iocp还需要一个线程池,线程池大小为cpu数量的2倍。

void
event_overlapped_init(struct event_overlapped *o, iocp_callback cb)

    memset(o, 0, sizeof(struct event_overlapped));
    o->cb = cb;


static void
handle_entry(OVERLAPPED *o, ULONG_PTR completion_key, DWORD nBytes, int ok)

    struct event_overlapped *eo =
        EVUTIL_UPCAST(o, struct event_overlapped, overlapped);
    eo->cb(eo, completion_key, nBytes, ok);


static void
loop(void *_port)

    struct event_iocp_port *port = _port;
    long ms = port->ms;
    HANDLE p = port->port;

    if (ms <= 0)
        ms = INFINITE;

    while (1) 
        OVERLAPPED *overlapped=NULL;
        ULONG_PTR key=0;
        DWORD bytes=0;
        int ok = GetQueuedCompletionStatus(p, &bytes, &key,
            &overlapped, ms);
        EnterCriticalSection(&port->lock);
        if (port->shutdown) 
            if (--port->n_live_threads == 0)
                ReleaseSemaphore(port->shutdownSemaphore, 1,
                        NULL);
            LeaveCriticalSection(&port->lock);
            return;
        
        LeaveCriticalSection(&port->lock);

        if (key != NOTIFICATION_KEY && overlapped)
            handle_entry(overlapped, key, bytes, ok);
        else if (!overlapped)
            break;
    
    event_warnx("GetQueuedCompletionStatus exited with no event.");
    EnterCriticalSection(&port->lock);
    if (--port->n_live_threads == 0)
        ReleaseSemaphore(port->shutdownSemaphore, 1, NULL);
    LeaveCriticalSection(&port->lock);

loop是线程池运行的函数,他通过调用GetQueuedCompletionStatus监听iocp中的事件。ms在初始化中被设置为-1,无限等待。正常轻快下GetQueuedCompletionStatus中的key会返回0,如果返回-1即NOTIFICATION_KEY证明调用了event_iocp_notify_all函数从而调用了PostQueuedCompletionStatus导致GetQueuedCompletionStatus立刻返回。这时需要关闭线程,结束iocp。当loop检测到事件之后会调用handle_entry来调用对应的回调。
event_overlapped的定义如下:

struct event_overlapped 
    OVERLAPPED overlapped;
    iocp_callback cb;
;

该结构体的第一个变量OVERLAPPED就是投入读写事件时传入的OVERLAPPED,根据GetQueuedCompletionStatus的返回可以获得对应的OVERLAPPED,从而可以根据EVUTIL_UPCAST获得event_overlapped结构体,调用对应的回调。

看完iocp的工作流程,接下来看bufferevent_asyn是如何使用iocp的。想要试用bufferevent_asyn,首先要调用bufferevent_async_new:

struct bufferevent *
bufferevent_async_new(struct event_base *base,
    evutil_socket_t fd, int options)

    struct bufferevent_async *bev_a;
    struct bufferevent *bev;
    struct event_iocp_port *iocp;

    options |= BEV_OPT_THREADSAFE;

    if (!(iocp = event_base_get_iocp(base)))
        return NULL;

    if (fd >= 0 && event_iocp_port_associate(iocp, fd, 1)<0) 
        int err = GetLastError();
        /* We may have alrady associated this fd with a port.
         * Let's hope it's this port, and that the error code
         * for doing this neer changes. */
        if (err != ERROR_INVALID_PARAMETER)
            return NULL;
    

    if (!(bev_a = mm_calloc(1, sizeof(struct bufferevent_async))))
        return NULL;

    bev = &bev_a->bev.bev;
    if (!(bev->input = evbuffer_overlapped_new(fd))) 
        mm_free(bev_a);
        return NULL;
    
    if (!(bev->output = evbuffer_overlapped_new(fd))) 
        evbuffer_free(bev->input);
        mm_free(bev_a);
        return NULL;
    

    if (bufferevent_init_common(&bev_a->bev, base, &bufferevent_ops_async,
        options)<0)
        goto err;

    evbuffer_add_cb(bev->input, be_async_inbuf_callback, bev);
    evbuffer_add_cb(bev->output, be_async_outbuf_callback, bev);

    event_overlapped_init(&bev_a->connect_overlapped, connect_complete);
    event_overlapped_init(&bev_a->read_overlapped, read_complete);
    event_overlapped_init(&bev_a->write_overlapped, write_complete);

    bev_a->ok = fd >= 0;
    if (bev_a->ok)
        _bufferevent_init_generic_timeout_cbs(bev);

    return bev;
err:
    bufferevent_free(&bev_a->bev.bev);
    return NULL;

参数中的fd可以是有效的套接字,也可以是-1(之后在be_async_ctrl中指定),event_iocp_port_associate用于把套接字关联到iocp的端口上。evbuffer_overlapped_new创建一个evbuffer_overlapped结构体并且返回一个evbuffer:

struct evbuffer_overlapped 
    struct evbuffer buffer;
    /** The socket that we're doing overlapped IO on. */
    evutil_socket_t fd;

    /** pending I/O type */
    unsigned read_in_progress : 1;
    unsigned write_in_progress : 1;

    /** The first pinned chain in the buffer. */
    struct evbuffer_chain *first_pinned;

    /** How many chains are pinned; how many of the fields in buffers
     * are we using. */
    int n_buffers;
    WSABUF buffers[MAX_WSABUFS];
;

struct evbuffer *
evbuffer_overlapped_new(evutil_socket_t fd)

    struct evbuffer_overlapped *evo;

    evo = mm_calloc(1, sizeof(struct evbuffer_overlapped));
    if (!evo)
        return NULL;

    TAILQ_INIT(&evo->buffer.callbacks);
    evo->buffer.refcnt = 1;
    evo->buffer.last_with_datap = &evo->buffer.first;

    evo->buffer.is_overlapped = 1;
    evo->fd = fd;

    return &evo->buffer;

evbuffer_overlapped结构体可以由evbuffer通过upcast_evbuffer函数得到。libevent大量使用这种方法隐藏复杂实现,只给开发者简单的通用的结构,内部则通过类型转换获得真正的结构体。evbuffer_overlapped结构体中read_in_progress和write_in_progress用来标记读写事件是否已经投递。buffers是iocp投递读写时用到的数据缓存。
继续看bufferevent_async_new函数,be_async_inbuf_callback和be_async_outbuf_callback函数被设置为读写evbuffer的callback。connect_complete,read_complete和write_complete分别被设置为connect_overlapped,read_overlapped和write_overlapped的回调。接下来就看一下这些回调:

static void
be_async_outbuf_callback(struct evbuffer *buf,
    const struct evbuffer_cb_info *cbinfo,
    void *arg)

    struct bufferevent *bev = arg;
    struct bufferevent_async *bev_async = upcast(bev);

    /* If we added data to the outbuf and were not writing before,
     * we may want to write now. */

    _bufferevent_incref_and_lock(bev);

    if (cbinfo->n_added)
        bev_async_consider_writing(bev_async);

    _bufferevent_decref_and_unlock(bev);

be_async_outbuf_callback是output的回调,当有数据写入output(如果设置阀值需要达到阀值)时会调用此函数。该函数会调用bev_async_consider_writing进行写事件的投递:

static void
bev_async_consider_writing(struct bufferevent_async *beva)

size_t at_most;
int limit;
struct bufferevent *bev = &beva->bev.bev;

/* Don't write if there's a write in progress, or we do not
 * want to write, or when there's nothing left to write. */
if (beva->write_in_progress || beva->bev.connecting)
    return;
if (!beva->ok || !(bev->enabled&EV_WRITE) ||
    !evbuffer_get_length(bev->output)) 
    bev_async_del_write(beva);
    return;


at_most = evbuffer_get_length(bev->output);

/* This is safe so long as bufferevent_get_write_max never returns
 * more than INT_MAX.  That's true for now. XXXX */
limit = (int)_bufferevent_get_write_max(&beva->bev);
if (at_most >= (size_t)limit && limit >= 0)
    at_most = limit;

if (beva->bev.write_suspended) 
    bev_async_del_write(beva);
    return;


/*  XXXX doesn't respect low-water mark very well. */
bufferevent_incref(bev);
if (evbuffer_launch_write(bev->output, at_most,
    &beva->write_overlapped)) 
    bufferevent_decref(bev);
    beva->ok = 0;
    _bufferevent_run_eventcb(bev, BEV_EVENT_ERROR);
 else 
    beva->write_in_progress = at_most;
    _bufferevent_decrement_write_buckets(&beva->bev, at_most);
    bev_async_add_write(beva);

bev_async_consider_writing会判断当前是否已经有数据在投递,如果没有则设置一个合理的at_most值然后调用evbuffer_launch_write:

int
evbuffer_launch_write(struct evbuffer *buf, ev_ssize_t at_most,
struct event_overlapped *ol)

struct evbuffer_overlapped *buf_o = upcast_evbuffer(buf);
int r = -1;
int i;
struct evbuffer_chain *chain;
DWORD bytesSent;

if (!buf) 
    /* No buffer, or it isn't overlapped */
    return -1;


EVBUFFER_LOCK(buf);
EVUTIL_ASSERT(!buf_o->read_in_progress);
if (buf->freeze_start || buf_o->write_in_progress)
    goto done;
if (!buf->total_len) 
    /* Nothing to write */
    r = 0;
    goto done;
 else if (at_most < 0 || (size_t)at_most > buf->total_len) 
    at_most = buf->total_len;

evbuffer_freeze(buf, 1);

buf_o->first_pinned = NULL;
buf_o->n_buffers = 0;
memset(buf_o->buffers, 0, sizeof(buf_o->buffers));

chain = buf_o->first_pinned = buf->first;

for (i=0; i < MAX_WSABUFS && chain; ++i, chain=chain->next) 
    WSABUF *b = &buf_o->buffers[i];
    b->buf = (char*)( chain->buffer + chain->misalign );
    _evbuffer_chain_pin(chain, EVBUFFER_MEM_PINNED_W);

    if ((size_t)at_most > chain->off) 
        /* XXXX Cast is safe for now, since win32 has no
           mmaped chains.  But later, we need to have this
           add more WSAbufs if chain->off is greater than
           ULONG_MAX */
        b->len = (unsigned long)chain->off;
        at_most -= chain->off;
     else 
        b->len = (unsigned long)at_most;
        ++i;
        break;
    


buf_o->n_buffers = i;
_evbuffer_incref(buf);
if (WSASend(buf_o->fd, buf_o->buffers, i, &bytesSent, 0,
    &ol->overlapped, NULL)) 
    int error = WSAGetLastError();
    if (error != WSA_IO_PENDING) 
        /* An actual error. */
        pin_release(buf_o, EVBUFFER_MEM_PINNED_W);
        evbuffer_unfreeze(buf, 1);
        evbuffer_free(buf); /* decref */
        goto done;
    


buf_o->write_in_progress = 1;
r = 0;

done:
EVBUFFER_UNLOCK(buf);
return r;

evbuffer_launch_write将evbuffer中的数据和WSABUF做一个映射,之后调用WSASend进行发送,次函数的参数ol是bufferevent_async的write_overlapped结构体,WSASend函数中传入的就是write_overlapped结构体中的overlapped变量:

static void
handle_entry(OVERLAPPED *o, ULONG_PTR completion_key, DWORD nBytes, int ok)

struct event_overlapped *eo =
EVUTIL_UPCAST(o, struct event_overlapped, overlapped);
eo->cb(eo, completion_key, nBytes, ok);

这样当handle_entry处理回调时就会找到write_overlapped结构体,从而找到write_complete回调:

static void
write_complete(struct event_overlapped *eo, ev_uintptr_t key,
ev_ssize_t nbytes, int ok)

struct bufferevent_async *bev_a = upcast_write(eo);
struct bufferevent *bev = &bev_a->bev.bev;
short what = BEV_EVENT_WRITING;
ev_ssize_t amount_unwritten;

BEV_LOCK(bev);
EVUTIL_ASSERT(bev_a->write_in_progress);

amount_unwritten = bev_a->write_in_progress - nbytes;
evbuffer_commit_write(bev->output, nbytes);
bev_a->write_in_progress = 0;

if (amount_unwritten)
    _bufferevent_decrement_write_buckets(&bev_a->bev,
                                         -amount_unwritten);


if (!ok)
    bev_async_set_wsa_error(bev, eo);

if (bev_a->ok) 
    if (ok && nbytes) 
        BEV_RESET_GENERIC_WRITE_TIMEOUT(bev);
        if (evbuffer_get_length(bev->output) <=
            bev->wm_write.low)
            _bufferevent_run_writecb(bev);
        bev_async_consider_writing(bev_a);
     else if (!ok) 
        what |= BEV_EVENT_ERROR;
        bev_a->ok = 0;
        _bufferevent_run_eventcb(bev, what);
     else if (!nbytes) 
        what |= BEV_EVENT_EOF;
        bev_a->ok = 0;
        _bufferevent_run_eventcb(bev, what);
    


_bufferevent_decref_and_unlock(bev);

write_complete首先调用evbuffer_commit_write来处理evbuffer中的数据,然后判断是否需要调用bufferevent的写事件回调活着事件回调,同时如果evbuffer中还有数据需要再次投递。

读事件和连接事件的流程和写事件的流程比较类似,这里不在分析,注意的是当使用IOCP模式时,bufferevent的读写事件不再又event_base管理,而是直接使用iocp。当投递读写事件活着连接事件时会调用event_base_add_virtual,当事件完成时则会调用event_base_del_virtual,这些事件对event_base来说是虚拟的,因为它们是iocp负责管理的。但是当iocp统计事件时同样会将他们计算在内。

static void
bev_async_del_write(struct bufferevent_async *beva)

struct bufferevent *bev = &beva->bev.bev;

if (beva->write_added) 
    beva->write_added = 0;
    event_base_del_virtual(bev->ev_base);

static void
bev_async_del_read(struct bufferevent_async *beva)

struct bufferevent *bev = &beva->bev.bev;

if (beva->read_added) 
    beva->read_added = 0;
    event_base_del_virtual(bev->ev_base);

static void
bev_async_add_write(struct bufferevent_async *beva)

struct bufferevent *bev = &beva->bev.bev;

if (!beva->write_added) 
    beva->write_added = 1;
    event_base_add_virtual(bev->ev_base);

static void
bev_async_add_read(struct bufferevent_async *beva)

struct bufferevent *bev = &beva->bev.bev;

if (!beva->read_added) 
    beva->read_added = 1;
    event_base_add_virtual(bev->ev_base);


“`
libevent的源码分析至此告一段落,另外libevent的http和dns在这里不在分析,它们都是建立在之前分析的源码基础上的应用,如果后面有时间会补上这部分的内容。

以上是关于Libevent源码分析--- IOCP的主要内容,如果未能解决你的问题,请参考以下文章

Nginx源码分析 - Nginx启动以及IOCP模型

Libevent源码分析--- libevent事件机制

Libevent源码分析--- libevent事件机制

Libevent源码分析--- bufferevent

Libevent源码分析 hello-world

libevent源码分析--(转)