[Openvpn-devel,v2,4/7] wintun: ring buffers based I/O

Message ID 1573148729-27339-5-git-send-email-lstipakov@gmail.com
State Superseded
Headers show
Series Wintun support | expand

Commit Message

Lev Stipakov Nov. 7, 2019, 6:45 a.m. UTC
From: Lev Stipakov <lev@openvpn.net>

Implemented according to Wintun documentation
and reference client code.

Wintun uses ring buffers to communicate between
kernel driver and user process. Client allocates
send and receive ring buffers, creates events
and passes it to kernel driver under LocalSystem
privileges.

When data is available for read, wintun modifies
"tail" pointer of send ring and signals via event.
User process reads data from "head" to "tail" and
updates "head" pointer.

When user process is ready to write, it writes
to receive ring, updates "tail" pointer and signals
to kernel via event.

In openvpn code we add send ring's event to event loop.
Before performing io wait, we compare "head" and "tail"
pointers of send ring and if they're different, we skip
io wait and perform read.

This also adds ring buffers support to tcp and udp
server code.

Signed-off-by: Lev Stipakov <lev@openvpn.net>
---
 src/openvpn/forward.c |  42 +++++++++++++++---
 src/openvpn/forward.h |  47 +++++++++++++++++++-
 src/openvpn/mtcp.c    |  28 +++++++++++-
 src/openvpn/mudp.c    |  14 ++++++
 src/openvpn/options.c |   4 +-
 src/openvpn/syshead.h |   1 +
 src/openvpn/tun.c     |  45 +++++++++++++++++++
 src/openvpn/tun.h     | 121 +++++++++++++++++++++++++++++++++++++++++++++++++-
 src/openvpn/win32.c   | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/openvpn/win32.h   |  47 ++++++++++++++++++++
 10 files changed, 458 insertions(+), 11 deletions(-)

Comments

Simon Rozman Nov. 9, 2019, 12:21 a.m. UTC | #1
Hi,
 
> -----Original Message-----
> From: Lev Stipakov [mailto:lstipakov@gmail.com]
> Sent: Thursday, November 7, 2019 6:45 PM
> To: openvpn-devel@lists.sourceforge.net
> Cc: Lev Stipakov <lev@openvpn.net>
> Subject: [Openvpn-devel] [PATCH v2 4/7] wintun: ring buffers based I/O
> 
> diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c index
> 8451706..0be8b6d 100644
> --- a/src/openvpn/forward.c
> +++ b/src/openvpn/forward.c
> @@ -1256,12 +1256,30 @@ read_incoming_tun(struct context *c)
>      perf_push(PERF_READ_IN_TUN);
> 
>      c->c2.buf = c->c2.buffers->read_tun_buf;
> +
>  #ifdef _WIN32
> -    read_tun_buffered(c->c1.tuntap, &c->c2.buf);
> +    if (c->c1.tuntap->wintun)
> +    {
> +        read_wintun(c->c1.tuntap, &c->c2.buf);
> +        if (c->c2.buf.len == -1)
> +        {
> +            register_signal(c, SIGHUP, "tun-abort");
> +            c->persist.restart_sleep_seconds = 1;
> +            msg(M_INFO, "Wintun read error, restarting");
> +            perf_pop();
> +            return;
> +        }
> +    }
> +    else
> +    {
> +        read_tun_buffered(c->c1.tuntap, &c->c2.buf);
>  #else
> -    ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
> -    ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
> -    c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf),
> MAX_RW_SIZE_TUN(&c->c2.frame));
> +        ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
> +        ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
> +        c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf),
> +MAX_RW_SIZE_TUN(&c->c2.frame)); #endif #ifdef _WIN32
> +    }
>  #endif

This #ifdef dance is a bit convoluted. I suggest simplification.

>  #ifdef PACKET_TRUNCATION_CHECK
> @@ -2103,7 +2121,21 @@ io_wait_dowork(struct context *c, const unsigned
> int flags)
>       * Configure event wait based on socket, tuntap flags.
>       */
>      socket_set(c->c2.link_socket, c->c2.event_set, socket, (void
*)&socket_shift,
> NULL);
> -    tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift,
NULL);
> +
> +#ifdef _WIN32
> +    if (c->c1.tuntap && c->c1.tuntap->wintun)
> +    {
> +        /* add ring buffer event */
> +        struct rw_handle rw = {.read = c->c1.tuntap->send_tail_moved };
> +        event_ctl(c->c2.event_set, &rw, EVENT_READ, (void *)&tun_shift);
> +    }
> +    else
> +    {
> +#endif
> +        tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void
> +*)&tun_shift, NULL); #ifdef _WIN32
> +    }
> +#endif

This #ifdef dance is a bit convoluted. I suggest simplification.

> +static inline int
> +write_wintun(struct tuntap *tt, struct buffer *buf) {
> +    struct tun_ring *ring = tt->receive_ring;
> +    ULONG head = ring->head;
> +    ULONG tail = ring->tail;
> +    ULONG aligned_packet_size;
> +    ULONG buf_space;
> +    struct TUN_PACKET *packet;
> +
> +    if ((head > WINTUN_RING_CAPACITY) || (tail >=
> WINTUN_RING_CAPACITY))

Should be `...head >= WINTUN_RING_CAPACITY...`

Regards,
Simon
Steffan Karger Nov. 9, 2019, 4:23 a.m. UTC | #2
Hi,

Some first-round review comments. I still need to fully grasp the event
mechanism intricacies for a real in-depth review.

As a general remark: could you try to stick to the 80 char line length
limit?

On 07-11-2019 18:45, Lev Stipakov wrote:
> From: Lev Stipakov <lev@openvpn.net>
> 
> Implemented according to Wintun documentation
> and reference client code.
> 
> Wintun uses ring buffers to communicate between
> kernel driver and user process. Client allocates
> send and receive ring buffers, creates events
> and passes it to kernel driver under LocalSystem
> privileges.
> 
> When data is available for read, wintun modifies
> "tail" pointer of send ring and signals via event.
> User process reads data from "head" to "tail" and
> updates "head" pointer.
> 
> When user process is ready to write, it writes
> to receive ring, updates "tail" pointer and signals
> to kernel via event.
> 
> In openvpn code we add send ring's event to event loop.
> Before performing io wait, we compare "head" and "tail"
> pointers of send ring and if they're different, we skip
> io wait and perform read.
> 
> This also adds ring buffers support to tcp and udp
> server code.
> 
> Signed-off-by: Lev Stipakov <lev@openvpn.net>
> ---
>  src/openvpn/forward.c |  42 +++++++++++++++---
>  src/openvpn/forward.h |  47 +++++++++++++++++++-
>  src/openvpn/mtcp.c    |  28 +++++++++++-
>  src/openvpn/mudp.c    |  14 ++++++
>  src/openvpn/options.c |   4 +-
>  src/openvpn/syshead.h |   1 +
>  src/openvpn/tun.c     |  45 +++++++++++++++++++
>  src/openvpn/tun.h     | 121 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/openvpn/win32.c   | 120 +++++++++++++++++++++++++++++++++++++++++++++++++
>  src/openvpn/win32.h   |  47 ++++++++++++++++++++
>  10 files changed, 458 insertions(+), 11 deletions(-)
> 
> diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c
> index 8451706..0be8b6d 100644
> --- a/src/openvpn/forward.c
> +++ b/src/openvpn/forward.c
> @@ -1256,12 +1256,30 @@ read_incoming_tun(struct context *c)
>      perf_push(PERF_READ_IN_TUN);
>  
>      c->c2.buf = c->c2.buffers->read_tun_buf;
> +
>  #ifdef _WIN32
> -    read_tun_buffered(c->c1.tuntap, &c->c2.buf);
> +    if (c->c1.tuntap->wintun)
> +    {
> +        read_wintun(c->c1.tuntap, &c->c2.buf);
> +        if (c->c2.buf.len == -1)
> +        {
> +            register_signal(c, SIGHUP, "tun-abort");
> +            c->persist.restart_sleep_seconds = 1;
> +            msg(M_INFO, "Wintun read error, restarting");
> +            perf_pop();
> +            return;
> +        }
> +    }
> +    else
> +    {
> +        read_tun_buffered(c->c1.tuntap, &c->c2.buf);
>  #else
> -    ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
> -    ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
> -    c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf), MAX_RW_SIZE_TUN(&c->c2.frame));
> +        ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
> +        ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
> +        c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf), MAX_RW_SIZE_TUN(&c->c2.frame));
> +#endif
> +#ifdef _WIN32
> +    }
>  #endif

As Simon mentioned too, this code is getting more and more hard to read.
Can we maybe do this in some cleaner way? Maybe add some helper function(s)?

>  
>  #ifdef PACKET_TRUNCATION_CHECK
> @@ -2103,7 +2121,21 @@ io_wait_dowork(struct context *c, const unsigned int flags)
>       * Configure event wait based on socket, tuntap flags.
>       */
>      socket_set(c->c2.link_socket, c->c2.event_set, socket, (void *)&socket_shift, NULL);
> -    tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift, NULL);
> +
> +#ifdef _WIN32
> +    if (c->c1.tuntap && c->c1.tuntap->wintun)
> +    {
> +        /* add ring buffer event */
> +        struct rw_handle rw = {.read = c->c1.tuntap->send_tail_moved };
> +        event_ctl(c->c2.event_set, &rw, EVENT_READ, (void *)&tun_shift);
> +    }
> +    else
> +    {
> +#endif
> +        tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift, NULL);
> +#ifdef _WIN32
> +    }
> +#endif

As above :)

>  
>  #ifdef ENABLE_MANAGEMENT
>      if (management)
> diff --git a/src/openvpn/forward.h b/src/openvpn/forward.h
> index 48202c0..6096fa8 100644
> --- a/src/openvpn/forward.h
> +++ b/src/openvpn/forward.h
> @@ -375,6 +375,19 @@ p2p_iow_flags(const struct context *c)
>      {
>          flags |= IOW_TO_TUN;
>      }
> +#ifdef _WIN32
> +    {
> +        struct tuntap *tt = c->c1.tuntap;
> +        if (tt && tt->wintun)
> +        {
> +            if (tt->send_ring->head == tt->send_ring->tail)
> +            {
> +                /* nothing to read from tun -> remove tun read flag set by IOW_READ */
> +                flags &= ~IOW_READ_TUN;
> +            }
> +        }
> +    }
> +#endif

Looks like this should be a helper function (with a good name that helps
understand what this does).

>      return flags;
>  }
>  
> @@ -403,8 +416,38 @@ io_wait(struct context *c, const unsigned int flags)
>      }
>      else
>      {
> -        /* slow path */
> -        io_wait_dowork(c, flags);
> +#ifdef _WIN32
> +        bool skip_iowait = flags & IOW_TO_TUN;
> +        if (flags & IOW_READ_TUN)
> +        {
> +            /*
> +             * don't read from tun if we have pending write to link,
> +             * since every tun read overwrites to_link buffer filled
> +             * by previous tun read
> +             */
> +            skip_iowait = !(flags & IOW_TO_LINK);
> +        }
> +        if (c->c1.tuntap && c->c1.tuntap->wintun && skip_iowait)
> +        {
> +            unsigned int ret = 0;
> +            if (flags & IOW_TO_TUN)
> +            {
> +                ret |= TUN_WRITE;
> +            }
> +            if (flags & IOW_READ_TUN)
> +            {
> +                ret |= TUN_READ;
> +            }
> +            c->c2.event_set_status = ret;

This chunk is the same as the "fast path" check above. Looks like you
should re-arrange this code so you don't have to duplicate it.

> +        }
> +        else
> +        {
> +#endif
> +            /* slow path */
> +            io_wait_dowork(c, flags);
> +#ifdef _WIN32
> +        }
> +#endif
>      }
>  }
>  
> diff --git a/src/openvpn/mtcp.c b/src/openvpn/mtcp.c
> index abe2059..9ac51c3 100644
> --- a/src/openvpn/mtcp.c
> +++ b/src/openvpn/mtcp.c
> @@ -270,7 +270,33 @@ multi_tcp_wait(const struct context *c,
>  {
>      int status;
>      socket_set_listen_persistent(c->c2.link_socket, mtcp->es, MTCP_SOCKET);
> -    tun_set(c->c1.tuntap, mtcp->es, EVENT_READ, MTCP_TUN, &mtcp->tun_rwflags);
> +
> +#ifdef _WIN32
> +    if (c->c1.tuntap && c->c1.tuntap->wintun)
> +    {
> +        if (c->c1.tuntap->send_ring->head != c->c1.tuntap->send_ring->tail)

Looking this deep into the tuntap struct from another module looks like
a layering violation. Consider adding a helper function like

   bool tuntap_wintun_data_available(const struct tuntap *tt)

or so.

> +        {
> +            /* there is data in wintun ring buffer, read it immediately */
> +            mtcp->esr[0].arg = MTCP_TUN;
> +            mtcp->esr[0].rwflags = EVENT_READ;
> +            mtcp->n_esr = 1;
> +            return 1;

This feels ugly, but I can't find a better approach either. Meh.

> +        }
> +        else
> +        {
> +            /* add ring buffer event */
> +            struct rw_handle rw = { .read = c->c1.tuntap->send_tail_moved };
> +            event_ctl(mtcp->es, &rw, EVENT_READ, MTCP_TUN);
> +        }

Can this not be done by tun_set() below? At first glance, if either
tun_event_handle() returns tt->send_tail_moved for wintun (instaed of
tt->rw_handle), of if tt->rw_handle would be set to send_tail_moved for
wintun. The latter might even allow you to get rid of
tt->send_tail_removed completely.

> +    }
> +    else
> +    {
> +#endif
> +        tun_set(c->c1.tuntap, mtcp->es, EVENT_READ, MTCP_TUN, &mtcp->tun_rwflags);
> +#ifdef _WIN32
> +    }
> +#endif
> +
>  #ifdef ENABLE_MANAGEMENT
>      if (management)
>      {
> diff --git a/src/openvpn/mudp.c b/src/openvpn/mudp.c
> index b7f061a..7715063 100644
> --- a/src/openvpn/mudp.c
> +++ b/src/openvpn/mudp.c
> @@ -279,6 +279,20 @@ p2mp_iow_flags(const struct multi_context *m)
>          flags |= IOW_READ;
>      }
>  
> +#ifdef _WIN32
> +    {
> +        struct tuntap* tt = m->top.c1.tuntap;
> +        if (tt && tt->wintun)
> +        {
> +            if (tt->send_ring->head == tt->send_ring->tail)

This should use the same helper function as suggested above.

> +            {
> +                /* nothing to read from tun -> remove tun read flag set by IOW_READ */
> +                flags &= ~IOW_READ_TUN;
> +            }
> +        }
> +    }
> +#endif
> +
>      return flags;
>  }
>  
> diff --git a/src/openvpn/options.c b/src/openvpn/options.c
> index 5c5033e..1891981 100644
> --- a/src/openvpn/options.c
> +++ b/src/openvpn/options.c
> @@ -2998,10 +2998,10 @@ options_postprocess_mutate_invariant(struct options *options)
>          options->ifconfig_noexec = false;
>      }
>  
> -    /* for wintun kernel doesn't send DHCP requests, so use ipapi to set IP address and netmask */
> +    /* for wintun kernel doesn't send DHCP requests, so use netsh to set IP address and netmask */
>      if (options->wintun)
>      {
> -        options->tuntap_options.ip_win32_type = IPW32_SET_IPAPI;
> +        options->tuntap_options.ip_win32_type = IPW32_SET_NETSH;
>      }
>  
>      remap_redirect_gateway_flags(options);
> diff --git a/src/openvpn/syshead.h b/src/openvpn/syshead.h
> index 899aa59..e9accb5 100644
> --- a/src/openvpn/syshead.h
> +++ b/src/openvpn/syshead.h
> @@ -39,6 +39,7 @@
>  #ifdef _WIN32
>  #include <windows.h>
>  #include <winsock2.h>
> +#include <tlhelp32.h>
>  #define sleep(x) Sleep((x)*1000)
>  #define random rand
>  #define srandom srand
> diff --git a/src/openvpn/tun.c b/src/openvpn/tun.c
> index 37bf065..b909b89 100644
> --- a/src/openvpn/tun.c
> +++ b/src/openvpn/tun.c
> @@ -798,6 +798,18 @@ init_tun_post(struct tuntap *tt,
>      tt->rw_handle.read = tt->reads.overlapped.hEvent;
>      tt->rw_handle.write = tt->writes.overlapped.hEvent;
>      tt->adapter_index = TUN_ADAPTER_INDEX_INVALID;
> +
> +    tt->send_ring = malloc(sizeof(struct tun_ring));
> +    tt->receive_ring = malloc(sizeof(struct tun_ring));
> +    if ((tt->send_ring == NULL) || (tt->receive_ring == NULL))
> +    {
> +        msg(M_FATAL, "Cannot allocate memory for receive ring");
> +    }
> +    ZeroMemory(tt->send_ring, sizeof(struct tun_ring));
> +    ZeroMemory(tt->receive_ring, sizeof(struct tun_ring));
> +
> +    tt->send_tail_moved = CreateEvent(NULL, FALSE, FALSE, NULL);
> +    tt->receive_tail_moved = CreateEvent(NULL, FALSE, FALSE, NULL);
>  #endif
>  }
>  
> @@ -6207,6 +6219,30 @@ open_tun(const char *dev, const char *dev_type, const char *dev_node, struct tun
>              tt->ipapi_context_defined = true;
>          }
>      }
> +
> +    if (tt->wintun)
> +    {
> +        if (tt->options.msg_channel)
> +        {
> +            /* TODO */
> +        }
> +        else
> +        {
> +            if (!impersonate_as_system())
> +            {
> +                msg(M_FATAL, "ERROR:  Failed to impersonate as SYSTEM, make sure process is running under privileged account");
> +            }
> +            if (!register_ring_buffers(tt->hand, tt->send_ring, tt->receive_ring, tt->send_tail_moved, tt->receive_tail_moved))
> +            {
> +                msg(M_FATAL, "ERROR:  Failed to register ring buffers: %lu", GetLastError());
> +            }
> +            if (!RevertToSelf())
> +            {
> +                msg(M_FATAL, "ERROR:  RevertToSelf error: %lu", GetLastError());
> +            }
> +        }
> +    }
> +
>      /*netcmd_semaphore_release ();*/
>      gc_free(&gc);
>  }
> @@ -6345,6 +6381,15 @@ close_tun(struct tuntap *tt, openvpn_net_ctx_t *ctx)
>          free(tt->actual_name);
>      }
>  
> +    CloseHandle(tt->receive_tail_moved);
> +    CloseHandle(tt->send_tail_moved);
> +
> +    free(tt->receive_ring);
> +    free(tt->send_ring);
> +
> +    tt->receive_ring = NULL;
> +    tt->send_ring = NULL;
> +
>      clear_tuntap(tt);
>      free(tt);
>      gc_free(&gc);
> diff --git a/src/openvpn/tun.h b/src/openvpn/tun.h
> index 19cab7e..cbdcce4 100644
> --- a/src/openvpn/tun.h
> +++ b/src/openvpn/tun.h
> @@ -183,6 +183,11 @@ struct tuntap
>  
>      bool wintun; /* true if wintun is used instead of tap-windows6 */
>      int standby_iter;
> +
> +    struct tun_ring *send_ring;
> +    struct tun_ring *receive_ring;
> +    HANDLE send_tail_moved;
> +    HANDLE receive_tail_moved;

These are wintun-specific, right? Should we maybe clarify this by
prefixing them with wintun_ ?

>  #else  /* ifdef _WIN32 */
>      int fd; /* file descriptor for TUN/TAP dev */
>  #endif
> @@ -479,10 +484,124 @@ read_tun_buffered(struct tuntap *tt, struct buffer *buf)
>      return tun_finalize(tt->hand, &tt->reads, buf);
>  }
>  
> +static inline ULONG
> +wintun_ring_packet_align(ULONG size)
> +{
> +    return (size + (WINTUN_PACKET_ALIGN - 1)) & ~(WINTUN_PACKET_ALIGN - 1);
> +}
> +
> +static inline ULONG
> +wintun_ring_wrap(ULONG value)
> +{
> +    return value & (WINTUN_RING_CAPACITY - 1);
> +}
> +
> +static inline void
> +read_wintun(struct tuntap *tt, struct buffer* buf)
> +{
> +    struct tun_ring *ring = tt->send_ring;
> +    ULONG head = ring->head;
> +    ULONG tail = ring->tail;
> +    ULONG content_len;
> +    struct TUN_PACKET *packet;
> +    ULONG aligned_packet_size;
> +
> +    *buf = tt->reads.buf_init;
> +    buf->len = 0;
> +
> +    if ((head >= WINTUN_RING_CAPACITY) || (tail >= WINTUN_RING_CAPACITY))
> +    {
> +        msg(M_INFO, "Wintun: ring capacity exceeded");
> +        buf->len = -1;
> +        return;
> +    }
> +
> +    if (head == tail)
> +    {
> +        /* nothing to read */
> +        return;
> +    }
> +
> +    content_len = wintun_ring_wrap(tail - head);
> +    if (content_len < sizeof(struct TUN_PACKET_HEADER))
> +    {
> +        msg(M_INFO, "Wintun: incomplete packet header in send ring");
> +        buf->len = -1;
> +        return;
> +    }
> +
> +    packet = (struct TUN_PACKET *) &ring->data[head];
> +    if (packet->size > WINTUN_MAX_PACKET_SIZE)
> +    {
> +        msg(M_INFO, "Wintun: packet too big in send ring");
> +        buf->len = -1;
> +        return;
> +    }
> +
> +    aligned_packet_size = wintun_ring_packet_align(sizeof(struct TUN_PACKET_HEADER) + packet->size);
> +    if (aligned_packet_size > content_len)
> +    {
> +        msg(M_INFO, "Wintun: incomplete packet in send ring");
> +        buf->len = -1;
> +        return;
> +    }
> +
> +    buf_write(buf, packet->data, packet->size);
> +
> +    head = wintun_ring_wrap(head + aligned_packet_size);
> +    ring->head = head;
> +}
> +
> +static inline int
> +write_wintun(struct tuntap *tt, struct buffer *buf)
> +{
> +    struct tun_ring *ring = tt->receive_ring;
> +    ULONG head = ring->head;
> +    ULONG tail = ring->tail;
> +    ULONG aligned_packet_size;
> +    ULONG buf_space;
> +    struct TUN_PACKET *packet;
> +
> +    if ((head > WINTUN_RING_CAPACITY) || (tail >= WINTUN_RING_CAPACITY))
> +    {
> +        msg(M_INFO, "Wintun: head/tail value is over capacity");
> +        return -1;
> +    }
> +
> +    aligned_packet_size = wintun_ring_packet_align(sizeof(struct TUN_PACKET_HEADER) + BLEN(buf));
> +    buf_space = wintun_ring_wrap(head - tail - WINTUN_PACKET_ALIGN);
> +    if (aligned_packet_size > buf_space)
> +    {
> +        msg(M_INFO, "Wintun: ring is full");
> +        return 0;
> +    }
> +
> +    /* copy packet size and data into ring */
> +    packet = (struct TUN_PACKET* )&ring->data[tail];
> +    packet->size = BLEN(buf);
> +    memcpy(packet->data, BPTR(buf), BLEN(buf));
> +
> +    /* move ring tail */
> +    ring->tail = wintun_ring_wrap(tail + aligned_packet_size);
> +    if (ring->alertable != 0)
> +    {
> +        SetEvent(tt->receive_tail_moved);
> +    }
> +
> +    return BLEN(buf);
> +}
> +
>  static inline int
>  write_tun_buffered(struct tuntap *tt, struct buffer *buf)
>  {
> -    return tun_write_win32(tt, buf);
> +    if (tt->wintun)
> +    {
> +        return write_wintun(tt, buf);
> +    }
> +    else
> +    {
> +        return tun_write_win32(tt, buf);
> +    }
>  }
>  
>  #else  /* ifdef _WIN32 */
> diff --git a/src/openvpn/win32.c b/src/openvpn/win32.c
> index eb4c030..e9e0258 100644
> --- a/src/openvpn/win32.c
> +++ b/src/openvpn/win32.c
> @@ -1493,4 +1493,124 @@ send_msg_iservice(HANDLE pipe, const void *data, size_t size,
>      return ret;
>  }
>  
> +bool
> +impersonate_as_system()
> +{
> +    HANDLE thread_token, process_snapshot, winlogon_process, winlogon_token, duplicated_token;
> +    PROCESSENTRY32 entry;
> +    BOOL ret;
> +    DWORD pid = 0;
> +    TOKEN_PRIVILEGES privileges;
> +
> +    CLEAR(entry);
> +    CLEAR(privileges);
> +
> +    entry.dwSize = sizeof(PROCESSENTRY32);
> +
> +    privileges.PrivilegeCount = 1;
> +    privileges.Privileges->Attributes = SE_PRIVILEGE_ENABLED;
> +
> +    if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &privileges.Privileges[0].Luid))
> +    {
> +        return false;
> +    }
> +
> +    if (!ImpersonateSelf(SecurityImpersonation))
> +    {
> +        return false;
> +    }
> +
> +    if (!OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, FALSE, &thread_token))
> +    {
> +        RevertToSelf();
> +        return false;
> +    }
> +    if (!AdjustTokenPrivileges(thread_token, FALSE, &privileges, sizeof(privileges), NULL, NULL))
> +    {
> +        CloseHandle(thread_token);
> +        RevertToSelf();
> +        return false;
> +    }
> +    CloseHandle(thread_token);
> +
> +    process_snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
> +    if (process_snapshot == INVALID_HANDLE_VALUE)
> +    {
> +        RevertToSelf();
> +        return false;
> +    }
> +    for (ret = Process32First(process_snapshot, &entry); ret; ret = Process32Next(process_snapshot, &entry))
> +    {
> +        if (!_stricmp(entry.szExeFile, "winlogon.exe"))
> +        {
> +            pid = entry.th32ProcessID;
> +            break;
> +        }
> +    }
> +    CloseHandle(process_snapshot);
> +    if (!pid)
> +    {
> +        RevertToSelf();
> +        return false;
> +    }
> +
> +    winlogon_process = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid);
> +    if (!winlogon_process)
> +    {
> +        RevertToSelf();
> +        return false;
> +    }
> +
> +    if (!OpenProcessToken(winlogon_process, TOKEN_IMPERSONATE | TOKEN_DUPLICATE, &winlogon_token))
> +    {
> +        CloseHandle(winlogon_process);
> +        RevertToSelf();
> +        return false;
> +    }
> +    CloseHandle(winlogon_process);
> +
> +    if (!DuplicateToken(winlogon_token, SecurityImpersonation, &duplicated_token))
> +    {
> +        CloseHandle(winlogon_token);
> +        RevertToSelf();
> +        return false;
> +    }
> +    CloseHandle(winlogon_token);
> +
> +    if (!SetThreadToken(NULL, duplicated_token))
> +    {
> +        CloseHandle(duplicated_token);
> +        RevertToSelf();
> +        return false;
> +    }
> +    CloseHandle(duplicated_token);
> +
> +    return true;
> +}
> +
> +bool
> +register_ring_buffers(HANDLE device,
> +                      struct tun_ring* send_ring,
> +                      struct tun_ring* receive_ring,
> +                      HANDLE send_tail_moved,
> +                      HANDLE receive_tail_moved)
> +{
> +    struct tun_register_rings rr;
> +    BOOL res;
> +
> +    ZeroMemory(&rr, sizeof(rr));
> +
> +    rr.send.ring = send_ring;
> +    rr.send.ring_size = sizeof(send_ring->data);
> +    rr.send.tail_moved = send_tail_moved;
> +
> +    rr.receive.ring = receive_ring;
> +    rr.receive.ring_size = sizeof(receive_ring->data);
> +    rr.receive.tail_moved = receive_tail_moved;
> +
> +    res = DeviceIoControl(device, TUN_IOCTL_REGISTER_RINGS, &rr, sizeof(rr), NULL, 0, NULL, NULL);
> +
> +    return res == TRUE;
> +}
> +
>  #endif /* ifdef _WIN32 */
> diff --git a/src/openvpn/win32.h b/src/openvpn/win32.h
> index 4814bbc..007c7d7 100644
> --- a/src/openvpn/win32.h
> +++ b/src/openvpn/win32.h
> @@ -25,6 +25,8 @@
>  #ifndef OPENVPN_WIN32_H
>  #define OPENVPN_WIN32_H
>  
> +#include <winioctl.h>
> +
>  #include "mtu.h"
>  #include "openvpn-msg.h"
>  #include "argv.h"
> @@ -323,5 +325,50 @@ bool send_msg_iservice(HANDLE pipe, const void *data, size_t size,
>  int
>  openvpn_execve(const struct argv *a, const struct env_set *es, const unsigned int flags);
>  
> +#define WINTUN_RING_CAPACITY 0x800000

Can you explain why this is the right size? Document either in a
comment, or in the commit message.

> +#define WINTUN_RING_TRAILING_BYTES 0x10000
> +#define WINTUN_RING_FRAMING_SIZE 12
> +#define WINTUN_MAX_PACKET_SIZE 0xffff
> +#define WINTUN_PACKET_ALIGN 4
> +
> +struct tun_ring
> +{
> +    volatile ULONG head;
> +    volatile ULONG tail;
> +    volatile LONG alertable;
> +    UCHAR data[WINTUN_RING_CAPACITY + WINTUN_RING_TRAILING_BYTES + WINTUN_RING_FRAMING_SIZE];
> +};
> +
> +struct tun_register_rings
> +{
> +    struct
> +    {
> +        ULONG ring_size;
> +        struct tun_ring *ring;
> +        HANDLE tail_moved;
> +    } send, receive;
> +};
> +
> +struct TUN_PACKET_HEADER
> +{
> +    uint32_t size;
> +};
> +
> +struct TUN_PACKET
> +{
> +    uint32_t size;
> +    UCHAR data[WINTUN_MAX_PACKET_SIZE];
> +};
> +
> +#define TUN_IOCTL_REGISTER_RINGS CTL_CODE(51820U, 0x970U, METHOD_BUFFERED, FILE_READ_DATA | FILE_WRITE_DATA)
> +
> +bool impersonate_as_system();
> +
> +bool register_ring_buffers(HANDLE device,
> +                           struct tun_ring *send_ring,
> +                           struct tun_ring *receive_ring,
> +                           HANDLE send_tail_moved,
> +                           HANDLE receive_tail_moved);
> +
>  #endif /* ifndef OPENVPN_WIN32_H */
>  #endif /* ifdef _WIN32 */
> 

-Steffan
Lev Stipakov Nov. 10, 2019, 1:48 a.m. UTC | #3
Hi,

As a general remark: could you try to stick to the 80 char line length
> limit?
>

Ok.


> > +#ifdef _WIN32
> > +    }
> >  #endif
>
> As Simon mentioned too, this code is getting more and more hard to read.
> Can we maybe do this in some cleaner way? Maybe add some helper
> function(s)?
>

Already fixed in v3.

> +#ifdef _WIN32
> > +    }
> > +#endif
>
> As above :)
>

Already fixed in v3.


> > +            if (tt->send_ring->head == tt->send_ring->tail)
> > +            {
> > +                /* nothing to read from tun -> remove tun read flag set
> by IOW_READ */
> > +                flags &= ~IOW_READ_TUN;
> > +            }
> > +        }
> > +    }
> > +#endif
>
> Looks like this should be a helper function (with a good name that helps
> understand what this does).
>


Implemented helper function tuntap_ring_empty().

> +            c->c2.event_set_status = ret;
>
> This chunk is the same as the "fast path" check above. Looks like you
> should re-arrange this code so you don't have to duplicate it.
>

Not quite (already discussed verbally).

> +#ifdef _WIN32
> > +    if (c->c1.tuntap && c->c1.tuntap->wintun)
> > +    {
> > +        if (c->c1.tuntap->send_ring->head !=
> c->c1.tuntap->send_ring->tail)
>
> Looking this deep into the tuntap struct from another module looks like
> a layering violation. Consider adding a helper function like
>

Fixed with the helper from above (tuntap_ring_empty).

> +        {
> > +            /* add ring buffer event */
> > +            struct rw_handle rw = { .read =
> c->c1.tuntap->send_tail_moved };
> > +            event_ctl(mtcp->es, &rw, EVENT_READ, MTCP_TUN);
> > +        }
>
> Can this not be done by tun_set() below? At first glance, if either
> tun_event_handle() returns tt->send_tail_moved for wintun (instaed of
> tt->rw_handle), of if tt->rw_handle would be set to send_tail_moved for
> wintun. The latter might even allow you to get rid of
> tt->send_tail_removed completely.
>

Thanks for the idea.

Indeed, I did some refactoring and reused rw_handle.read/write
instead of send/receive_tail_moved. Also got rid of wintun-specific
event_ctl calls - now it is handled in tun_set().


> > +            if (tt->send_ring->head == tt->send_ring->tail)
>
> This should use the same helper function as suggested above.
>

Done.

> +    struct tun_ring *send_ring;
> > +    struct tun_ring *receive_ring;
> > +    HANDLE send_tail_moved;
> > +    HANDLE receive_tail_moved;
>
> These are wintun-specific, right? Should we maybe clarify this by
> prefixing them with wintun_ ?
>

  *_tail_moved are gone, added "wintun_" prefix to *_ring.


> > +#define WINTUN_RING_CAPACITY 0x800000
>
> Can you explain why this is the right size? Document either in a
> comment, or in the commit message.
>

Those are from WG client, mentioned it in a comment.

-Steffan


V4 is coming!

Patch

diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c
index 8451706..0be8b6d 100644
--- a/src/openvpn/forward.c
+++ b/src/openvpn/forward.c
@@ -1256,12 +1256,30 @@  read_incoming_tun(struct context *c)
     perf_push(PERF_READ_IN_TUN);
 
     c->c2.buf = c->c2.buffers->read_tun_buf;
+
 #ifdef _WIN32
-    read_tun_buffered(c->c1.tuntap, &c->c2.buf);
+    if (c->c1.tuntap->wintun)
+    {
+        read_wintun(c->c1.tuntap, &c->c2.buf);
+        if (c->c2.buf.len == -1)
+        {
+            register_signal(c, SIGHUP, "tun-abort");
+            c->persist.restart_sleep_seconds = 1;
+            msg(M_INFO, "Wintun read error, restarting");
+            perf_pop();
+            return;
+        }
+    }
+    else
+    {
+        read_tun_buffered(c->c1.tuntap, &c->c2.buf);
 #else
-    ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
-    ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
-    c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf), MAX_RW_SIZE_TUN(&c->c2.frame));
+        ASSERT(buf_init(&c->c2.buf, FRAME_HEADROOM(&c->c2.frame)));
+        ASSERT(buf_safe(&c->c2.buf, MAX_RW_SIZE_TUN(&c->c2.frame)));
+        c->c2.buf.len = read_tun(c->c1.tuntap, BPTR(&c->c2.buf), MAX_RW_SIZE_TUN(&c->c2.frame));
+#endif
+#ifdef _WIN32
+    }
 #endif
 
 #ifdef PACKET_TRUNCATION_CHECK
@@ -2103,7 +2121,21 @@  io_wait_dowork(struct context *c, const unsigned int flags)
      * Configure event wait based on socket, tuntap flags.
      */
     socket_set(c->c2.link_socket, c->c2.event_set, socket, (void *)&socket_shift, NULL);
-    tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift, NULL);
+
+#ifdef _WIN32
+    if (c->c1.tuntap && c->c1.tuntap->wintun)
+    {
+        /* add ring buffer event */
+        struct rw_handle rw = {.read = c->c1.tuntap->send_tail_moved };
+        event_ctl(c->c2.event_set, &rw, EVENT_READ, (void *)&tun_shift);
+    }
+    else
+    {
+#endif
+        tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift, NULL);
+#ifdef _WIN32
+    }
+#endif
 
 #ifdef ENABLE_MANAGEMENT
     if (management)
diff --git a/src/openvpn/forward.h b/src/openvpn/forward.h
index 48202c0..6096fa8 100644
--- a/src/openvpn/forward.h
+++ b/src/openvpn/forward.h
@@ -375,6 +375,19 @@  p2p_iow_flags(const struct context *c)
     {
         flags |= IOW_TO_TUN;
     }
+#ifdef _WIN32
+    {
+        struct tuntap *tt = c->c1.tuntap;
+        if (tt && tt->wintun)
+        {
+            if (tt->send_ring->head == tt->send_ring->tail)
+            {
+                /* nothing to read from tun -> remove tun read flag set by IOW_READ */
+                flags &= ~IOW_READ_TUN;
+            }
+        }
+    }
+#endif
     return flags;
 }
 
@@ -403,8 +416,38 @@  io_wait(struct context *c, const unsigned int flags)
     }
     else
     {
-        /* slow path */
-        io_wait_dowork(c, flags);
+#ifdef _WIN32
+        bool skip_iowait = flags & IOW_TO_TUN;
+        if (flags & IOW_READ_TUN)
+        {
+            /*
+             * don't read from tun if we have pending write to link,
+             * since every tun read overwrites to_link buffer filled
+             * by previous tun read
+             */
+            skip_iowait = !(flags & IOW_TO_LINK);
+        }
+        if (c->c1.tuntap && c->c1.tuntap->wintun && skip_iowait)
+        {
+            unsigned int ret = 0;
+            if (flags & IOW_TO_TUN)
+            {
+                ret |= TUN_WRITE;
+            }
+            if (flags & IOW_READ_TUN)
+            {
+                ret |= TUN_READ;
+            }
+            c->c2.event_set_status = ret;
+        }
+        else
+        {
+#endif
+            /* slow path */
+            io_wait_dowork(c, flags);
+#ifdef _WIN32
+        }
+#endif
     }
 }
 
diff --git a/src/openvpn/mtcp.c b/src/openvpn/mtcp.c
index abe2059..9ac51c3 100644
--- a/src/openvpn/mtcp.c
+++ b/src/openvpn/mtcp.c
@@ -270,7 +270,33 @@  multi_tcp_wait(const struct context *c,
 {
     int status;
     socket_set_listen_persistent(c->c2.link_socket, mtcp->es, MTCP_SOCKET);
-    tun_set(c->c1.tuntap, mtcp->es, EVENT_READ, MTCP_TUN, &mtcp->tun_rwflags);
+
+#ifdef _WIN32
+    if (c->c1.tuntap && c->c1.tuntap->wintun)
+    {
+        if (c->c1.tuntap->send_ring->head != c->c1.tuntap->send_ring->tail)
+        {
+            /* there is data in wintun ring buffer, read it immediately */
+            mtcp->esr[0].arg = MTCP_TUN;
+            mtcp->esr[0].rwflags = EVENT_READ;
+            mtcp->n_esr = 1;
+            return 1;
+        }
+        else
+        {
+            /* add ring buffer event */
+            struct rw_handle rw = { .read = c->c1.tuntap->send_tail_moved };
+            event_ctl(mtcp->es, &rw, EVENT_READ, MTCP_TUN);
+        }
+    }
+    else
+    {
+#endif
+        tun_set(c->c1.tuntap, mtcp->es, EVENT_READ, MTCP_TUN, &mtcp->tun_rwflags);
+#ifdef _WIN32
+    }
+#endif
+
 #ifdef ENABLE_MANAGEMENT
     if (management)
     {
diff --git a/src/openvpn/mudp.c b/src/openvpn/mudp.c
index b7f061a..7715063 100644
--- a/src/openvpn/mudp.c
+++ b/src/openvpn/mudp.c
@@ -279,6 +279,20 @@  p2mp_iow_flags(const struct multi_context *m)
         flags |= IOW_READ;
     }
 
+#ifdef _WIN32
+    {
+        struct tuntap* tt = m->top.c1.tuntap;
+        if (tt && tt->wintun)
+        {
+            if (tt->send_ring->head == tt->send_ring->tail)
+            {
+                /* nothing to read from tun -> remove tun read flag set by IOW_READ */
+                flags &= ~IOW_READ_TUN;
+            }
+        }
+    }
+#endif
+
     return flags;
 }
 
diff --git a/src/openvpn/options.c b/src/openvpn/options.c
index 5c5033e..1891981 100644
--- a/src/openvpn/options.c
+++ b/src/openvpn/options.c
@@ -2998,10 +2998,10 @@  options_postprocess_mutate_invariant(struct options *options)
         options->ifconfig_noexec = false;
     }
 
-    /* for wintun kernel doesn't send DHCP requests, so use ipapi to set IP address and netmask */
+    /* for wintun kernel doesn't send DHCP requests, so use netsh to set IP address and netmask */
     if (options->wintun)
     {
-        options->tuntap_options.ip_win32_type = IPW32_SET_IPAPI;
+        options->tuntap_options.ip_win32_type = IPW32_SET_NETSH;
     }
 
     remap_redirect_gateway_flags(options);
diff --git a/src/openvpn/syshead.h b/src/openvpn/syshead.h
index 899aa59..e9accb5 100644
--- a/src/openvpn/syshead.h
+++ b/src/openvpn/syshead.h
@@ -39,6 +39,7 @@ 
 #ifdef _WIN32
 #include <windows.h>
 #include <winsock2.h>
+#include <tlhelp32.h>
 #define sleep(x) Sleep((x)*1000)
 #define random rand
 #define srandom srand
diff --git a/src/openvpn/tun.c b/src/openvpn/tun.c
index 37bf065..b909b89 100644
--- a/src/openvpn/tun.c
+++ b/src/openvpn/tun.c
@@ -798,6 +798,18 @@  init_tun_post(struct tuntap *tt,
     tt->rw_handle.read = tt->reads.overlapped.hEvent;
     tt->rw_handle.write = tt->writes.overlapped.hEvent;
     tt->adapter_index = TUN_ADAPTER_INDEX_INVALID;
+
+    tt->send_ring = malloc(sizeof(struct tun_ring));
+    tt->receive_ring = malloc(sizeof(struct tun_ring));
+    if ((tt->send_ring == NULL) || (tt->receive_ring == NULL))
+    {
+        msg(M_FATAL, "Cannot allocate memory for receive ring");
+    }
+    ZeroMemory(tt->send_ring, sizeof(struct tun_ring));
+    ZeroMemory(tt->receive_ring, sizeof(struct tun_ring));
+
+    tt->send_tail_moved = CreateEvent(NULL, FALSE, FALSE, NULL);
+    tt->receive_tail_moved = CreateEvent(NULL, FALSE, FALSE, NULL);
 #endif
 }
 
@@ -6207,6 +6219,30 @@  open_tun(const char *dev, const char *dev_type, const char *dev_node, struct tun
             tt->ipapi_context_defined = true;
         }
     }
+
+    if (tt->wintun)
+    {
+        if (tt->options.msg_channel)
+        {
+            /* TODO */
+        }
+        else
+        {
+            if (!impersonate_as_system())
+            {
+                msg(M_FATAL, "ERROR:  Failed to impersonate as SYSTEM, make sure process is running under privileged account");
+            }
+            if (!register_ring_buffers(tt->hand, tt->send_ring, tt->receive_ring, tt->send_tail_moved, tt->receive_tail_moved))
+            {
+                msg(M_FATAL, "ERROR:  Failed to register ring buffers: %lu", GetLastError());
+            }
+            if (!RevertToSelf())
+            {
+                msg(M_FATAL, "ERROR:  RevertToSelf error: %lu", GetLastError());
+            }
+        }
+    }
+
     /*netcmd_semaphore_release ();*/
     gc_free(&gc);
 }
@@ -6345,6 +6381,15 @@  close_tun(struct tuntap *tt, openvpn_net_ctx_t *ctx)
         free(tt->actual_name);
     }
 
+    CloseHandle(tt->receive_tail_moved);
+    CloseHandle(tt->send_tail_moved);
+
+    free(tt->receive_ring);
+    free(tt->send_ring);
+
+    tt->receive_ring = NULL;
+    tt->send_ring = NULL;
+
     clear_tuntap(tt);
     free(tt);
     gc_free(&gc);
diff --git a/src/openvpn/tun.h b/src/openvpn/tun.h
index 19cab7e..cbdcce4 100644
--- a/src/openvpn/tun.h
+++ b/src/openvpn/tun.h
@@ -183,6 +183,11 @@  struct tuntap
 
     bool wintun; /* true if wintun is used instead of tap-windows6 */
     int standby_iter;
+
+    struct tun_ring *send_ring;
+    struct tun_ring *receive_ring;
+    HANDLE send_tail_moved;
+    HANDLE receive_tail_moved;
 #else  /* ifdef _WIN32 */
     int fd; /* file descriptor for TUN/TAP dev */
 #endif
@@ -479,10 +484,124 @@  read_tun_buffered(struct tuntap *tt, struct buffer *buf)
     return tun_finalize(tt->hand, &tt->reads, buf);
 }
 
+static inline ULONG
+wintun_ring_packet_align(ULONG size)
+{
+    return (size + (WINTUN_PACKET_ALIGN - 1)) & ~(WINTUN_PACKET_ALIGN - 1);
+}
+
+static inline ULONG
+wintun_ring_wrap(ULONG value)
+{
+    return value & (WINTUN_RING_CAPACITY - 1);
+}
+
+static inline void
+read_wintun(struct tuntap *tt, struct buffer* buf)
+{
+    struct tun_ring *ring = tt->send_ring;
+    ULONG head = ring->head;
+    ULONG tail = ring->tail;
+    ULONG content_len;
+    struct TUN_PACKET *packet;
+    ULONG aligned_packet_size;
+
+    *buf = tt->reads.buf_init;
+    buf->len = 0;
+
+    if ((head >= WINTUN_RING_CAPACITY) || (tail >= WINTUN_RING_CAPACITY))
+    {
+        msg(M_INFO, "Wintun: ring capacity exceeded");
+        buf->len = -1;
+        return;
+    }
+
+    if (head == tail)
+    {
+        /* nothing to read */
+        return;
+    }
+
+    content_len = wintun_ring_wrap(tail - head);
+    if (content_len < sizeof(struct TUN_PACKET_HEADER))
+    {
+        msg(M_INFO, "Wintun: incomplete packet header in send ring");
+        buf->len = -1;
+        return;
+    }
+
+    packet = (struct TUN_PACKET *) &ring->data[head];
+    if (packet->size > WINTUN_MAX_PACKET_SIZE)
+    {
+        msg(M_INFO, "Wintun: packet too big in send ring");
+        buf->len = -1;
+        return;
+    }
+
+    aligned_packet_size = wintun_ring_packet_align(sizeof(struct TUN_PACKET_HEADER) + packet->size);
+    if (aligned_packet_size > content_len)
+    {
+        msg(M_INFO, "Wintun: incomplete packet in send ring");
+        buf->len = -1;
+        return;
+    }
+
+    buf_write(buf, packet->data, packet->size);
+
+    head = wintun_ring_wrap(head + aligned_packet_size);
+    ring->head = head;
+}
+
+static inline int
+write_wintun(struct tuntap *tt, struct buffer *buf)
+{
+    struct tun_ring *ring = tt->receive_ring;
+    ULONG head = ring->head;
+    ULONG tail = ring->tail;
+    ULONG aligned_packet_size;
+    ULONG buf_space;
+    struct TUN_PACKET *packet;
+
+    if ((head > WINTUN_RING_CAPACITY) || (tail >= WINTUN_RING_CAPACITY))
+    {
+        msg(M_INFO, "Wintun: head/tail value is over capacity");
+        return -1;
+    }
+
+    aligned_packet_size = wintun_ring_packet_align(sizeof(struct TUN_PACKET_HEADER) + BLEN(buf));
+    buf_space = wintun_ring_wrap(head - tail - WINTUN_PACKET_ALIGN);
+    if (aligned_packet_size > buf_space)
+    {
+        msg(M_INFO, "Wintun: ring is full");
+        return 0;
+    }
+
+    /* copy packet size and data into ring */
+    packet = (struct TUN_PACKET* )&ring->data[tail];
+    packet->size = BLEN(buf);
+    memcpy(packet->data, BPTR(buf), BLEN(buf));
+
+    /* move ring tail */
+    ring->tail = wintun_ring_wrap(tail + aligned_packet_size);
+    if (ring->alertable != 0)
+    {
+        SetEvent(tt->receive_tail_moved);
+    }
+
+    return BLEN(buf);
+}
+
 static inline int
 write_tun_buffered(struct tuntap *tt, struct buffer *buf)
 {
-    return tun_write_win32(tt, buf);
+    if (tt->wintun)
+    {
+        return write_wintun(tt, buf);
+    }
+    else
+    {
+        return tun_write_win32(tt, buf);
+    }
 }
 
 #else  /* ifdef _WIN32 */
diff --git a/src/openvpn/win32.c b/src/openvpn/win32.c
index eb4c030..e9e0258 100644
--- a/src/openvpn/win32.c
+++ b/src/openvpn/win32.c
@@ -1493,4 +1493,124 @@  send_msg_iservice(HANDLE pipe, const void *data, size_t size,
     return ret;
 }
 
+bool
+impersonate_as_system()
+{
+    HANDLE thread_token, process_snapshot, winlogon_process, winlogon_token, duplicated_token;
+    PROCESSENTRY32 entry;
+    BOOL ret;
+    DWORD pid = 0;
+    TOKEN_PRIVILEGES privileges;
+
+    CLEAR(entry);
+    CLEAR(privileges);
+
+    entry.dwSize = sizeof(PROCESSENTRY32);
+
+    privileges.PrivilegeCount = 1;
+    privileges.Privileges->Attributes = SE_PRIVILEGE_ENABLED;
+
+    if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &privileges.Privileges[0].Luid))
+    {
+        return false;
+    }
+
+    if (!ImpersonateSelf(SecurityImpersonation))
+    {
+        return false;
+    }
+
+    if (!OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, FALSE, &thread_token))
+    {
+        RevertToSelf();
+        return false;
+    }
+    if (!AdjustTokenPrivileges(thread_token, FALSE, &privileges, sizeof(privileges), NULL, NULL))
+    {
+        CloseHandle(thread_token);
+        RevertToSelf();
+        return false;
+    }
+    CloseHandle(thread_token);
+
+    process_snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
+    if (process_snapshot == INVALID_HANDLE_VALUE)
+    {
+        RevertToSelf();
+        return false;
+    }
+    for (ret = Process32First(process_snapshot, &entry); ret; ret = Process32Next(process_snapshot, &entry))
+    {
+        if (!_stricmp(entry.szExeFile, "winlogon.exe"))
+        {
+            pid = entry.th32ProcessID;
+            break;
+        }
+    }
+    CloseHandle(process_snapshot);
+    if (!pid)
+    {
+        RevertToSelf();
+        return false;
+    }
+
+    winlogon_process = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid);
+    if (!winlogon_process)
+    {
+        RevertToSelf();
+        return false;
+    }
+
+    if (!OpenProcessToken(winlogon_process, TOKEN_IMPERSONATE | TOKEN_DUPLICATE, &winlogon_token))
+    {
+        CloseHandle(winlogon_process);
+        RevertToSelf();
+        return false;
+    }
+    CloseHandle(winlogon_process);
+
+    if (!DuplicateToken(winlogon_token, SecurityImpersonation, &duplicated_token))
+    {
+        CloseHandle(winlogon_token);
+        RevertToSelf();
+        return false;
+    }
+    CloseHandle(winlogon_token);
+
+    if (!SetThreadToken(NULL, duplicated_token))
+    {
+        CloseHandle(duplicated_token);
+        RevertToSelf();
+        return false;
+    }
+    CloseHandle(duplicated_token);
+
+    return true;
+}
+
+bool
+register_ring_buffers(HANDLE device,
+                      struct tun_ring* send_ring,
+                      struct tun_ring* receive_ring,
+                      HANDLE send_tail_moved,
+                      HANDLE receive_tail_moved)
+{
+    struct tun_register_rings rr;
+    BOOL res;
+
+    ZeroMemory(&rr, sizeof(rr));
+
+    rr.send.ring = send_ring;
+    rr.send.ring_size = sizeof(send_ring->data);
+    rr.send.tail_moved = send_tail_moved;
+
+    rr.receive.ring = receive_ring;
+    rr.receive.ring_size = sizeof(receive_ring->data);
+    rr.receive.tail_moved = receive_tail_moved;
+
+    res = DeviceIoControl(device, TUN_IOCTL_REGISTER_RINGS, &rr, sizeof(rr), NULL, 0, NULL, NULL);
+
+    return res == TRUE;
+}
+
 #endif /* ifdef _WIN32 */
diff --git a/src/openvpn/win32.h b/src/openvpn/win32.h
index 4814bbc..007c7d7 100644
--- a/src/openvpn/win32.h
+++ b/src/openvpn/win32.h
@@ -25,6 +25,8 @@ 
 #ifndef OPENVPN_WIN32_H
 #define OPENVPN_WIN32_H
 
+#include <winioctl.h>
+
 #include "mtu.h"
 #include "openvpn-msg.h"
 #include "argv.h"
@@ -323,5 +325,50 @@  bool send_msg_iservice(HANDLE pipe, const void *data, size_t size,
 int
 openvpn_execve(const struct argv *a, const struct env_set *es, const unsigned int flags);
 
+#define WINTUN_RING_CAPACITY 0x800000
+#define WINTUN_RING_TRAILING_BYTES 0x10000
+#define WINTUN_RING_FRAMING_SIZE 12
+#define WINTUN_MAX_PACKET_SIZE 0xffff
+#define WINTUN_PACKET_ALIGN 4
+
+struct tun_ring
+{
+    volatile ULONG head;
+    volatile ULONG tail;
+    volatile LONG alertable;
+    UCHAR data[WINTUN_RING_CAPACITY + WINTUN_RING_TRAILING_BYTES + WINTUN_RING_FRAMING_SIZE];
+};
+
+struct tun_register_rings
+{
+    struct
+    {
+        ULONG ring_size;
+        struct tun_ring *ring;
+        HANDLE tail_moved;
+    } send, receive;
+};
+
+struct TUN_PACKET_HEADER
+{
+    uint32_t size;
+};
+
+struct TUN_PACKET
+{
+    uint32_t size;
+    UCHAR data[WINTUN_MAX_PACKET_SIZE];
+};
+
+#define TUN_IOCTL_REGISTER_RINGS CTL_CODE(51820U, 0x970U, METHOD_BUFFERED, FILE_READ_DATA | FILE_WRITE_DATA)
+
+bool impersonate_as_system();
+
+bool register_ring_buffers(HANDLE device,
+                           struct tun_ring *send_ring,
+                           struct tun_ring *receive_ring,
+                           HANDLE send_tail_moved,
+                           HANDLE receive_tail_moved);
+
 #endif /* ifndef OPENVPN_WIN32_H */
 #endif /* ifdef _WIN32 */