[Openvpn-devel,1/2] ovpn-dco: introduce FreeBSD data-channel offload support

Message ID 20220808143424.65924-3-kprovost@netgate.com
State Changes Requested
Headers show
Series [Openvpn-devel,1/2] ovpn-dco: introduce FreeBSD data-channel offload support | expand

Commit Message

Kristof Provost via Openvpn-devel Aug. 8, 2022, 4:34 a.m. UTC
From: Kristof Provost <kp@FreeBSD.org>

Implement data-channel offload for FreeBSD. The implementation and flow
is very similar to that of the Linux DCO support.

Signed-off-by: Kristof Provost <kprovost@netgate.com>
---
 configure.ac                   |   5 +
 src/openvpn/Makefile.am        |   1 +
 src/openvpn/dco_freebsd.c      | 636 +++++++++++++++++++++++++++++++++
 src/openvpn/dco_freebsd.h      |  59 +++
 src/openvpn/dco_internal.h     |   1 +
 src/openvpn/forward.c          |   8 +-
 src/openvpn/mtcp.c             |   4 +-
 src/openvpn/mudp.c             |   2 +-
 src/openvpn/multi.c            |   4 +-
 src/openvpn/options.c          |   8 +-
 src/openvpn/options.h          |   4 +-
 src/openvpn/ovpn_dco_freebsd.h |  64 ++++
 src/openvpn/tun.c              |  38 +-
 src/openvpn/tun.h              |   6 +
 14 files changed, 808 insertions(+), 32 deletions(-)
 create mode 100644 src/openvpn/dco_freebsd.c
 create mode 100644 src/openvpn/dco_freebsd.h
 create mode 100644 src/openvpn/ovpn_dco_freebsd.h

Comments

Gert Doering Aug. 10, 2022, 6:39 a.m. UTC | #1
(Re-sending, as the first one had a .png attached which exceeded what
sourceforge is willing to forward)

Hi,

as promised, here's test results and code review.

Test results:

 - running openvpn over TCP gives me a kernel panic - this is not so
   nice... (see attached .png from the vmware console) - userland seems
   to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
   (so intentional panic, not corruption panic).

   This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824

 - running openvpn over UDP has issues with fragmentation - almost all
   t_client tests that *do* use DCO fail the "big ping" test

	run IPv4 ping tests (want_ok), 64 byte packets...
	run IPv4 ping tests (want_ok), 1440 byte packets...
	run IPv4 ping tests (want_ok), 3000 byte packets...

	FAIL: IPv4 ping test (3000 bytes) failed, but should succeed.
	run IPv6 ping tests (want_ok), 64 byte packets...
	run IPv6 ping tests (want_ok), 1440 byte packets...
	run IPv6 ping tests (want_ok), 3000 byte packets...

	FAIL: IPv6 ping test (3000 bytes) failed, but should succeed.

   this is "with no special mtu related options to OpenVPN", so the
   tun interface is MTU 1500, and the pings are created by

        fping -b 3000 -C 20 -p 250 -q -C 5 -p 400 10.194.5.1 10.194.0.1
        fping6 -b 3000 -C 20 -p 250 -q -C 5 -p 400 fd00:abcd:194:5::1 fd00:abcd:194:0::1

   (testing the host "on the p2p link" and "something behind a --route
    pushed by the server")

   I have not investigated exactly what happens at which point yet, but
   with Linux DCO, we had a similar effect where a fragmented *inside*
   packet ("ICMP") carried over a flag in the sk_buff that resulted in
   the kernel refusing to fragment the resulting *outside* packet.

   (Inside MTU 1500 -> fping -s 3000 -> 3 "inside" fragments, 
    1500+1500+68, adding openvpn overhead -> packet > 1500 -> 
    external fragmentation needed for the first two)

   Inside tcpdump (tun0):

17:28:37.342050 IP (tos 0x0, ttl 64, id 50758, offset 0, flags [+], proto ICMP (1), length 1500)
    10.194.5.222 > 10.194.5.1: ICMP echo request, id 2433, seq 0, length 1480
17:28:37.342142 IP (tos 0x0, ttl 64, id 50758, offset 1480, flags [+], proto ICMP (1), length 1500)
    10.194.5.222 > 10.194.5.1: ip-proto-1
17:28:37.342270 IP (tos 0x0, ttl 64, id 50758, offset 2960, flags [none], proto ICMP (1), length 68)
    10.194.5.222 > 10.194.5.1: ip-proto-1

   Outside tcpdump (em0):

17:28:37.342103 IP (tos 0x0, ttl 64, id 50759, offset 0, flags [+], proto UDP (17), length 1500)
    194.97.140.5.15738 > 199.102.77.82.51197: UDP, bad length 1528 > 1472
17:28:37.342123 IP (tos 0x0, ttl 64, id 50759, offset 1480, flags [none], proto UDP (17), length 76)
    194.97.140.5 > 199.102.77.82: ip-proto-17
17:28:37.342185 IP (tos 0x0, ttl 64, id 50760, offset 0, flags [+], proto UDP (17), length 1500)
    194.97.140.5.15738 > 199.102.77.82.51197: UDP, bad length 1528 > 1472
17:28:37.342228 IP (tos 0x0, ttl 64, id 50760, offset 1480, flags [none], proto UDP (17), length 76)
    194.97.140.5 > 199.102.77.82: ip-proto-17
17:28:37.342300 IP (tos 0x0, ttl 64, id 50761, offset 0, flags [none], proto UDP (17), length 132)
    194.97.140.5.15738 > 199.102.77.82.51197: UDP, length 104


   ... this *does* look correct, but there is never a reply from the other
   end, so something is not right.  Either inside or outside.

   This happens with IPv4 or IPv6 UDP outside, and IPv4 or IPv6 inside.


- tcpdump'ing on the DCO interface gave me complains from the kernel
  about locking on ctrl-c'ing

    Aug 10 17:28:41 fbsd14 kernel: lock order bpf global lock -> iflib ctx lock attempted at:
    Aug 10 17:28:41 fbsd14 kernel: #0 0xffffffff80c5c3dd at witness_checkorder+0xbfd
    Aug 10 17:28:41 fbsd14 kernel: #1 0xffffffff80bf5303 at _sx_xlock+0x63
    Aug 10 17:28:41 fbsd14 kernel: #2 0xffffffff80d3874f at iflib_if_ioctl+0x2df
    Aug 10 17:28:41 fbsd14 kernel: #3 0xffffffff80d19b5e at if_setflag+0xde
    Aug 10 17:28:41 fbsd14 kernel: #4 0xffffffff80d19a2a at ifpromisc+0x2a
    Aug 10 17:28:41 fbsd14 kernel: #5 0xffffffff80d0e72b at bpf_detachd_locked+0x27b
    Aug 10 17:28:41 fbsd14 kernel: #6 0xffffffff80d111f7 at bpf_dtor+0x87
    Aug 10 17:28:41 fbsd14 kernel: #7 0xffffffff80a7818b at devfs_destroy_cdevpriv+0xab
    Aug 10 17:28:41 fbsd14 kernel: #8 0xffffffff80a7bda4 at devfs_close_f+0x64
    Aug 10 17:28:41 fbsd14 kernel: #9 0xffffffff80b876eb at _fdrop+0x1b
    Aug 10 17:28:41 fbsd14 kernel: #10 0xffffffff80b8af3b at closef+0x1db
    Aug 10 17:28:41 fbsd14 kernel: #11 0xffffffff80b8ec97 at closefp_impl+0x77
    Aug 10 17:28:41 fbsd14 kernel: #12 0xffffffff810c733e at amd64_syscall+0x12e
    Aug 10 17:28:41 fbsd14 kernel: #13 0xffffffff8109ae0b at fast_syscall_common+0xf8

   ... so while this is outside "openvpn source code patches", it's 
   still something that smells like it needs to be addressed.


Now, coding style ;-) - as promised, I went through the code for things
that need to be done in a certain way in OpenVPN land, due to agreed
convention... inline (things I do not comment could go in "as is").


On Mon, Aug 08, 2022 at 04:34:23PM +0200, Kristof Provost via Openvpn-devel wrote:
> diff --git a/src/openvpn/dco_freebsd.c b/src/openvpn/dco_freebsd.c
> new file mode 100644
> index 00000000..a8a53fe3
> --- /dev/null
> +++ b/src/openvpn/dco_freebsd.c
> @@ -0,0 +1,636 @@
[..]
> +static nvlist_t *
> +sockaddr_to_nvlist(const struct sockaddr *sa)
> +{
[..]
> +        default:
> +            abort();

please use "ASSERT(0);" here.  It will do the same thing, but if ever
hit, it will print a __FILE__, __LINE__ message to the log, so it's
easier for us to see *where* it triggered a "this must never happen"
condition.


> +int
> +dco_new_peer(dco_context_t *dco, unsigned int peerid, int sd,
> +             struct sockaddr *localaddr, struct sockaddr *remoteaddr,
> +             struct in_addr *remote_in4, struct in6_addr *remote_in6)
> +{
> +    struct ifdrv drv;
> +    nvlist_t *nvl;
> +    int ret;
> +
> +    nvl = nvlist_create(0);

We use C99 these days, so this could be written as 

> +    nvlist_t *nvl = nvlist_create(0);

but this is a matter of personal preference.  Both are fine, just wanted
to point out that the option exists.

> +
> +    nvlist_add_number(nvl, "fd", sd);
> +    nvlist_add_number(nvl, "peerid", peerid);
> +
> +    bzero(&drv, sizeof(drv));
> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
> +    drv.ifd_cmd = OVPN_NEW_PEER;
> +    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
> +
> +    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
> +    free(drv.ifd_data);

What happens should nvlist_pack() fail here?  Manpage says it returns
NULL, will the ioctl() handle this gracefully, or will something crash?

(If the ioctl() returns something like EINVAL in this case, perfectly fine)

> +    nvlist_destroy(nvl);
> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to create new peer %d", errno);
> +        return ret;
> +    }
> +
> +    return 0;
> +}

Most functions in OpenVPN that do use a "ret" variable would look
more like this:

> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to create new peer %d", errno);
> +    }
> +
> +    return ret;
> +}

we do have all sorts of variants (*sigh*), but since you also used both 
versions, let's go for this one.

> +        msg(D_DCO, "Failed to create new peer %d", errno);

this can be written as

> +        msg(D_DCO|M_ERRNO, "Failed to create new peer");

M_ERRNO will automatically append the strerror(errno) stuff.

D_DCO is "--verb 3", so that message won't be visible in normal operation
- if we consider this an error, then it should be

> +        msg(M_WARN|M_ERRNO, "Failed to create new peer");

instead (or if it's "fatal error, give up" -> M_ERR).


Now, I'm not sure if accessing errno after the free() call is guaranteed
to be save - so maybe move the msg() call up?


> +bool
> +ovpn_dco_init(int mode, dco_context_t *dco)
> +{
> +    if (open_fd(dco) < 0)
> +    {
> +        msg(D_DCO, "Failed to open socket");

Same here - if you want this to be heard, D_DCO should be M_WARN|M_ERRNO.

If this is just informational, because the caller will make sufficient
noise, D_DCO|M_ERRNO is good (but you might want to point out that this 
is the "DCO socket" that couldn't be opened).

> +static int
> +create_interface(struct tuntap *tt, const char *dev)
> +{
> +    int ret;
> +    struct ifreq ifr;
> +
> +    bzero(&ifr, sizeof(ifr));

There is a convenience macro here, CLEAR(ifr)  (which does exactly
this - #define CLEAR(x) memset(&(x), 0, sizeof(x)).

> +    /* Create ovpnx first, then rename it. */
> +    snprintf(ifr.ifr_name, IFNAMSIZ, "ovpn");
> +    ret = ioctl(tt->dco.fd, SIOCIFCREATE2, &ifr);
> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_name, errno);
> +        return ret;
> +    }

-> M_ERRNO

> +    /* Rename */
> +    if (!strcmp(dev, "tun"))
> +    {
> +        ifr.ifr_data = "ovpn";
> +    }
> +    else
> +    {
> +        ifr.ifr_data = (char *)dev;
> +    }

I'm not sure I understand this code part.  When would dev be "tun"
here, triggering a rename to "ovpn"?

The call chain leading to this is (now)

 - tun.c:open_tun_dco_generic()
   - open_tun_dco()
     - create_interface(tt,dev)

and tun_dco_generic() should never pass in a bare "tun" name (because
in that case, it would iterate "tun0, tun1, tun2").

Is this a safeguard against "bad things will happen in kernel land if
we use the unqualified name of another driver"?

> +    ret = ioctl(tt->dco.fd, SIOCSIFNAME, &ifr);
> +    if (ret)
> +    {
> +        /* Delete the created interface again. */
> +        (void)ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
> +        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_data, errno);

-> M_ERRNO

(there's more, but no need to point them out individually :-) )

> +static int
> +remove_interface(struct tuntap *tt)
> +{
> +    int ret;
> +    struct ifreq ifr;
> +
> +    bzero(&ifr, sizeof(ifr));
> +    snprintf(ifr.ifr_name, IFNAMSIZ, "%s", tt->dco.ifname);
> +
> +    ret = ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to remove interface %s: %d", ifr.ifr_name, errno);
> +        return ret;
> +    }
> +
> +    tt->dco.ifname[0] = 0;
> +
> +    return 0;
> +}

Maybe always clear the ifname, and do "return ret;" here too?

> +static int
> +start_tun(dco_context_t *dco)
> +{
> +    struct ifdrv drv;
> +    int ret;
> +
> +    bzero(&drv, sizeof(drv));

CLEAR(drv);


> +int
> +dco_do_read(dco_context_t *dco)
> +{
> +    struct ifdrv drv;
> +    uint8_t buf[4096];
> +    nvlist_t *nvl;
> +    const uint8_t *pkt;
> +    size_t pktlen;
> +    int ret;
> +
> +    /* Flush any pending data from the pipe. */
> +    (void)read(dco->pipefd[1], buf, sizeof(buf));
> +
> +    bzero(&drv, sizeof(drv));
> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
> +    drv.ifd_cmd = OVPN_GET_PKT;
> +    drv.ifd_data = buf;
> +    drv.ifd_len = sizeof(buf);
> +
> +    ret = ioctl(dco->fd, SIOCGDRVSPEC, &drv);
> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to read control packet %d", errno);
> +        return errno;

Documentation (dco.h) says "0 on success or a negative error code otherwise",
so this needs to be "return -errno;" to behave the same as dco_linux

> +    }
> +
> +    nvl = nvlist_unpack(buf, drv.ifd_len, 0);
> +    if (!nvl)
> +    {
> +        msg(D_DCO, "Failed to unpack nvlist");
> +        return EINVAL;

-EINVAL


> +int
> +dco_do_write(dco_context_t *dco, int peer_id, struct buffer *buf)
> +{
> +    struct ifdrv drv;
> +    nvlist_t *nvl;
> +    int ret;
> +
> +    nvl = nvlist_create(0);
> +
> +    nvlist_add_binary(nvl, "packet", BSTR(buf), BLEN(buf));
> +    nvlist_add_number(nvl, "peerid", peer_id);
> +
> +    bzero(&drv, sizeof(drv));
> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
> +    drv.ifd_cmd = OVPN_SEND_PKT;
> +    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
> +
> +    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
> +    free(drv.ifd_data);
> +    nvlist_destroy(nvl);
> +    if (ret)
> +    {
> +        msg(D_DCO, "Failed to send control packet %d", errno);
> +        return ret;
> +    }
> +
> +    return BLEN(buf);
> +}

The return code of dco_do_write() is not clearly defined in dco.h
(Antonio, looking at you...!) but the linux version seems to do
"negative is error, positive is buffer length written", so this should
be fine.  Might need a change to "return -errno".

> +bool
> +dco_available(int msglevel)
> +{
> +    struct if_clonereq ifcr;
> +    char *buf = NULL;
> +    int fd;
> +    int ret;
> +    bool available = false;
> +
> +    /* Attempt to load the module. Ignore errors, because it might already be
> +     * loaded, or built into the kernel. */
> +    (void)kldload("if_ovpn");
> +
> +    fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
> +    if (fd < 0)
> +    {
> +        return false;
> +    }
> +
> +    bzero(&ifcr, sizeof(ifcr));

CLEAR(ifcr)

> +    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
> +    if (ret != 0)
> +    {
> +        goto out;
> +    }
> +
> +    buf = malloc(ifcr.ifcr_total * IFNAMSIZ);
> +
> +    ifcr.ifcr_count = ifcr.ifcr_total;
> +    ifcr.ifcr_buffer = buf;
> +    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
> +    if (ret != 0)
> +    {
> +        goto out;
> +    }
> +
> +    for (int i = 0; i < ifcr.ifcr_total; i++)
> +    {
> +        if (strcmp(buf + (i * IFNAMSIZ), "openvpn") == 0)

This looks fairly magic to me - am I reading this right that the first
call returns a number of "somethings", then we malloc(something * IFNAMSIZ),
and the second call returns a list of strings that describe "something",
and if there is one of them named "openvpn", we know that DCO is 
available?

Why is it called "openvpn", not "ovpn" (if_ovpn, man ovpn)?

(A few lines of comment on what SIOCIFGCLONERS does - or which manpage
to look at - would be appreciated here :-) )

> +const char *
> +dco_get_supported_ciphers()
> +{
> +    return "none:AES-256-GCM:AES-128-GCM:CHACHA20-POLY1305";
> +}

Is "none" indeed supported?  I find that useful to test, but Arne and
Antonio refuse(d) to support it in linux DCO :-)

> diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c
> index 55c939c4..14ad24fa 100644
> --- a/src/openvpn/forward.c
> +++ b/src/openvpn/forward.c

... these are all very straightforward "LINUX" -> "LINUX || FREEBSD" :-)

> diff --git a/src/openvpn/tun.c b/src/openvpn/tun.c
> index 54f7d72c..e84d5a27 100644
> --- a/src/openvpn/tun.c
> +++ b/src/openvpn/tun.c
[..]
> @@ -2917,20 +2917,24 @@ void
>  open_tun(const char *dev, const char *dev_type, const char *dev_node, struct tuntap *tt,
>           openvpn_net_ctx_t *ctx)
>  {
> -    open_tun_generic(dev, dev_type, dev_node, tt);
> -
> -    if (tt->fd >= 0 && tt->type == DEV_TYPE_TUN)
> -    {
> -        int i = IFF_POINTOPOINT | IFF_MULTICAST;
> +    if (tun_dco_enabled(tt)) {
> +        open_tun_dco_generic(dev, dev_type, tt, ctx);
> +    } else {
> +        open_tun_generic(dev, dev_type, dev_node, tt);

This one will upset the whitespace dragon and it will eat all of
the patch :-)

OpenVPN bracket convention requires this to be:

+    if (tun_dco_enabled(tt))
+    {
+        open_tun_dco_generic(dev, dev_type, tt, ctx);
+    }
+    else
+    {
+        open_tun_generic(dev, dev_type, dev_node, tt);

(I run uncrustify on merge, so this would get fixed - but it would
be good to have it properly formatted right away)

>  
> -        if (ioctl(tt->fd, TUNSIFMODE, &i) < 0)
> +        if (tt->fd >= 0 && tt->type == DEV_TYPE_TUN)
>          {
> -            msg(M_WARN | M_ERRNO, "ioctl(TUNSIFMODE)");
> -        }
> -        i = 1;
> -        if (ioctl(tt->fd, TUNSIFHEAD, &i) < 0)
> -        {

... this looks like a massive diff, but actually it's just an indenting
change that git diff does not present nicely (checked with "git show -w").


That's it for today :-)

gert
Kristof Provost via Openvpn-devel Aug. 10, 2022, 8 a.m. UTC | #2
Thanks!

On 10 Aug 2022, at 18:32, Gert Doering wrote:
> Test results:
>
>  - running openvpn over TCP gives me a kernel panic - this is not so
>    nice... (see attached .png from the vmware console) - userland seems
>    to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
>    (so intentional panic, not corruption panic).
>
>    This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
>
I think I see what the problem is. I’m writing up a test case to reproduce it and will commit a fix soon. Hopefully tomorrow.

>  - running openvpn over UDP has issues with fragmentation - almost all
>    t_client tests that *do* use DCO fail the "big ping" test
>
I’m going to have to do some digging here.

> - tcpdump'ing on the DCO interface gave me complains from the kernel
>   about locking on ctrl-c'ing
>
>     Aug 10 17:28:41 fbsd14 kernel: lock order bpf global lock -> iflib ctx lock attempted at:
>     Aug 10 17:28:41 fbsd14 kernel: #0 0xffffffff80c5c3dd at witness_checkorder+0xbfd
>     Aug 10 17:28:41 fbsd14 kernel: #1 0xffffffff80bf5303 at _sx_xlock+0x63
>     Aug 10 17:28:41 fbsd14 kernel: #2 0xffffffff80d3874f at iflib_if_ioctl+0x2df
>     Aug 10 17:28:41 fbsd14 kernel: #3 0xffffffff80d19b5e at if_setflag+0xde
>     Aug 10 17:28:41 fbsd14 kernel: #4 0xffffffff80d19a2a at ifpromisc+0x2a
>     Aug 10 17:28:41 fbsd14 kernel: #5 0xffffffff80d0e72b at bpf_detachd_locked+0x27b
>     Aug 10 17:28:41 fbsd14 kernel: #6 0xffffffff80d111f7 at bpf_dtor+0x87
>     Aug 10 17:28:41 fbsd14 kernel: #7 0xffffffff80a7818b at devfs_destroy_cdevpriv+0xab
>     Aug 10 17:28:41 fbsd14 kernel: #8 0xffffffff80a7bda4 at devfs_close_f+0x64
>     Aug 10 17:28:41 fbsd14 kernel: #9 0xffffffff80b876eb at _fdrop+0x1b
>     Aug 10 17:28:41 fbsd14 kernel: #10 0xffffffff80b8af3b at closef+0x1db
>     Aug 10 17:28:41 fbsd14 kernel: #11 0xffffffff80b8ec97 at closefp_impl+0x77
>     Aug 10 17:28:41 fbsd14 kernel: #12 0xffffffff810c733e at amd64_syscall+0x12e
>     Aug 10 17:28:41 fbsd14 kernel: #13 0xffffffff8109ae0b at fast_syscall_common+0xf8
>
>    ... so while this is outside "openvpn source code patches", it's
>    still something that smells like it needs to be addressed.
>
That appears to be unrelated to DCO. It’s a problem with iflib, and I think I’ve seen it before.
Out of scope for DCO, at least.

> Now, coding style ;-) - as promised, I went through the code for things
> that need to be done in a certain way in OpenVPN land, due to agreed
> convention... inline (things I do not comment could go in "as is").
>
I’ll fix those once I’ve dealt with the panic and fragmentation issues.

Thanks for the review.

Kristof
Kristof Provost via Openvpn-devel Aug. 11, 2022, 5:25 a.m. UTC | #3
On 10 Aug 2022, at 18:32, Gert Doering wrote:
> as promised, here's test results and code review.
>
> Test results:
>
>  - running openvpn over TCP gives me a kernel panic - this is not so
>    nice... (see attached .png from the vmware console) - userland seems
>    to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
>    (so intentional panic, not corruption panic).
>
>    This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
>
I’ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
I simply didn’t think that user space might give us a non-UDP socket, so checking for that and rejecting the peer in that case fixes the panic. Thanks for finding that.

>  - running openvpn over UDP has issues with fragmentation - almost all
>    t_client tests that *do* use DCO fail the "big ping" test
>
>     run IPv4 ping tests (want_ok), 64 byte packets...
>     run IPv4 ping tests (want_ok), 1440 byte packets...
>     run IPv4 ping tests (want_ok), 3000 byte packets...
>
>     FAIL: IPv4 ping test (3000 bytes) failed, but should succeed.
>     run IPv6 ping tests (want_ok), 64 byte packets...
>     run IPv6 ping tests (want_ok), 1440 byte packets...
>     run IPv6 ping tests (want_ok), 3000 byte packets...
>
>     FAIL: IPv6 ping test (3000 bytes) failed, but should succeed.
>
>    this is "with no special mtu related options to OpenVPN", so the
>    tun interface is MTU 1500, and the pings are created by
>
>         fping -b 3000 -C 20 -p 250 -q -C 5 -p 400 10.194.5.1 10.194.0.1
>         fping6 -b 3000 -C 20 -p 250 -q -C 5 -p 400 fd00:abcd:194:5::1 fd00:abcd:194:0::1
>
>    (testing the host "on the p2p link" and "something behind a --route
>     pushed by the server")
>
I’m failing to reproduce this in my own test setup. With a simple ‘ping -c 2000’ on a 1500 MTU tunnel I see two layers of fragmentation (the ICMP packet gets fragmented before it goes through the tunnel, and then the outer UDP packet gets fragmented as well), as I’d expect. Traffic flows in both directions. (i.e. the ping succeeds).

Is there any documentation on how t_client.sh works? It’s not at all clear to me what goes in t_client.in (e.g. in OPENVPN_BASE_P2P).

>    I have not investigated exactly what happens at which point yet, but
>    with Linux DCO, we had a similar effect where a fragmented *inside*
>    packet ("ICMP") carried over a flag in the sk_buff that resulted in
>    the kernel refusing to fragment the resulting *outside* packet.
>
>    (Inside MTU 1500 -> fping -s 3000 -> 3 "inside" fragments,
>     1500+1500+68, adding openvpn overhead -> packet > 1500 ->
>     external fragmentation needed for the first two)
>
>    Inside tcpdump (tun0):
>
> 17:28:37.342050 IP (tos 0x0, ttl 64, id 50758, offset 0, flags [+], proto ICMP (1), length 1500)
>     10.194.5.222 > 10.194.5.1: ICMP echo request, id 2433, seq 0, length 1480
> 17:28:37.342142 IP (tos 0x0, ttl 64, id 50758, offset 1480, flags [+], proto ICMP (1), length 1500)
>     10.194.5.222 > 10.194.5.1: ip-proto-1
> 17:28:37.342270 IP (tos 0x0, ttl 64, id 50758, offset 2960, flags [none], proto ICMP (1), length 68)
>     10.194.5.222 > 10.194.5.1: ip-proto-1
>
>    Outside tcpdump (em0):
>
> 17:28:37.342103 IP (tos 0x0, ttl 64, id 50759, offset 0, flags [+], proto UDP (17), length 1500)
>     194.97.140.5.15738 > 199.102.77.82.51197: UDP, bad length 1528 > 1472
> 17:28:37.342123 IP (tos 0x0, ttl 64, id 50759, offset 1480, flags [none], proto UDP (17), length 76)
>     194.97.140.5 > 199.102.77.82: ip-proto-17
> 17:28:37.342185 IP (tos 0x0, ttl 64, id 50760, offset 0, flags [+], proto UDP (17), length 1500)
>     194.97.140.5.15738 > 199.102.77.82.51197: UDP, bad length 1528 > 1472
> 17:28:37.342228 IP (tos 0x0, ttl 64, id 50760, offset 1480, flags [none], proto UDP (17), length 76)
>     194.97.140.5 > 199.102.77.82: ip-proto-17
> 17:28:37.342300 IP (tos 0x0, ttl 64, id 50761, offset 0, flags [none], proto UDP (17), length 132)
>     194.97.140.5.15738 > 199.102.77.82.51197: UDP, length 104
>
>
>    ... this *does* look correct, but there is never a reply from the other
>    end, so something is not right.  Either inside or outside.
>
Is your server in this scenario a FreeBSD DCO machine, or Linux DCO or something else?
In my setup the server is non-DCO FreeBSD.

Best regards,
Kristof
Arne Schwabe Aug. 11, 2022, 7 a.m. UTC | #4
Am 11.08.22 um 17:25 schrieb Kristof Provost via Openvpn-devel:
> On 10 Aug 2022, at 18:32, Gert Doering wrote:
>> as promised, here's test results and code review.
>>
>> Test results:
>>
>>   - running openvpn over TCP gives me a kernel panic - this is not so
>>     nice... (see attached .png from the vmware console) - userland seems
>>     to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
>>     (so intentional panic, not corruption panic).
>>
>>     This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
>>
> I’ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
> I simply didn’t think that user space might give us a non-UDP socket, so checking for that and rejecting the peer in that case fixes the panic. Thanks for finding that.

You should probably also modify the check for dco incompatible in 
OpenVPN so that using TCP disables DCO on FreeBSD.

Arne
Gert Doering Aug. 11, 2022, 8:06 a.m. UTC | #5
Hi,

On Thu, Aug 11, 2022 at 07:00:25PM +0200, Arne Schwabe wrote:
> > I???ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
> > I simply didn???t think that user space might give us a non-UDP socket, so checking for that and rejecting the peer in that case fixes the panic. Thanks for finding that.
> 
> You should probably also modify the check for dco incompatible in 
> OpenVPN so that using TCP disables DCO on FreeBSD.

Indeed.  One of the next patches will bring "per platform options check",
which would be a good place for the #ifdef TARGET_FREEBSD bits.

I'll see that I can pick part of that patch and apply it tomorrow
(the patch as posted refers to symbols that are brought in by the
"main" win-dco patch, but half of it should apply & make sense as is)

gert
Gert Doering Aug. 11, 2022, 11:11 a.m. UTC | #6
Hi,

On Thu, Aug 11, 2022 at 05:25:05PM +0200, Kristof Provost via Openvpn-devel wrote:
> On 10 Aug 2022, at 18:32, Gert Doering wrote:
> > as promised, here's test results and code review.
> >
> > Test results:
> >
> >  - running openvpn over TCP gives me a kernel panic - this is not so
> >    nice... (see attached .png from the vmware console) - userland seems
> >    to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
> >    (so intentional panic, not corruption panic).
> >
> >    This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
>
> I???ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
> I simply didn???t think that user space might give us a non-UDP
> socket, so checking for that and rejecting the peer in that case
> fixes the panic. Thanks for finding that.

Always happy to crash other people's software :-)

That said, this seems to imply that FreeBSD DCO is UDP-only today - do
you have plans for TCP as well, or is this something "someone else needs
to contribute"?

For the time being we should add an #ifdef TARGET_FREEBSD check to
dco.c:dco_check_option_conflict_platform() and disable DCO for
--proto tcp.

I thought the function would only get introduced in patch 20/25 of
the big DCO series, but that patch is a bit out of sync - the function
is already there, with Linux specific checks.


> >  - running openvpn over UDP has issues with fragmentation - almost all
> >    t_client tests that *do* use DCO fail the "big ping" test
> >
> >     FAIL: IPv4 ping test (3000 bytes) failed, but should succeed.
> >     FAIL: IPv6 ping test (3000 bytes) failed, but should succeed.
> >
> >    this is "with no special mtu related options to OpenVPN", so the
> >    tun interface is MTU 1500, and the pings are created by
> >
> >         fping -b 3000 -C 20 -p 250 -q -C 5 -p 400 10.194.5.1 10.194.0.1
> >         fping6 -b 3000 -C 20 -p 250 -q -C 5 -p 400 fd00:abcd:194:5::1 fd00:abcd:194:0::1
> >
> >    (testing the host "on the p2p link" and "something behind a --route
> >     pushed by the server")
>
> I???m failing to reproduce this in my own test setup. With a
> simple ???ping -c 2000??? on a 1500 MTU tunnel I see two layers of
> fragmentation (the ICMP packet gets fragmented before it goes through
> the tunnel, and then the outer UDP packet gets fragmented as well),
> as I???d expect. Traffic flows in both directions. (i.e. the ping
> succeeds).

OK.  This is generally good, so I need to find out who is eating my
packets.  In my case, I have 3 fragments, which might or might not
cause a difference - and there's stuff in between (FreeBSD 14 client
via FreeBSD 12 pf(4) firewall to FreeBSD 11 [or so] server).

> Is there any documentation on how t_client.sh works? It???s not at all 
> clear to me what goes in t_client.in (e.g. in OPENVPN_BASE_P2P).

Basically, t_client is a very basic vehicle to fire up certain
OpenVPN configurations, verify that the expect IPv4 and IPv6 addresses
show up on "a new interface", and that pings to certain destinations
work, with varying packet sizes.  It's a bit of a sledge hammer, but
it's good at finding stuff.

My configs have something like this:

CA_CERT="$KEYBASE/ca.crt"
CLIENT_KEY="$KEYBASE/cron2-gentoo-i386.key"
CLIENT_CERT="$KEYBASE/cron2-gentoo-i386.crt"

REMOTE=conn-test-server.openvpn.org

# this is the basic "TLS client" stuff with ca/cert/key
# remove the comp-lzo for DCO tests
OPENVPN_BASE_P2MP="--client --ca $CA_CERT \
        --cert $CLIENT_CERT --key $CLIENT_KEY \
        --remote-cert-tls server --comp-lzo --verb 3 $OPENVPN_EXTRA_CF"

#
# Test 2: UDP / p2mp tun
#   specify IPv4+IPv6 addresses expected from server and ping targets
#
RUN_TITLE_2="udp / p2pm / top net30"
OPENVPN_CONF_2="$OPENVPN_BASE_P2MP --dev tun --proto udp4 --remote $REMOTE --port 51194"
EXPECT_IFCONFIG4_2="10.194.2.54"
EXPECT_IFCONFIG6_2="fd00:abcd:194:2::100c"
PING4_HOSTS_2="10.194.2.1 10.194.0.1"
PING6_HOSTS_2="fd00:abcd:194:2::1 fd00:abcd:194:0::1"

... so this would connect to a specific UDP instance on the test server,
expect to be assigned these addresses (= this needs adjustment after
the first run, when you know what addresses will be assigned, and it
assumes --ip-pool-persist on the server), and then pings things.

I usually ping "the server IP on that tun" and "a loopback IP on the
server, which is only reachable by means of 'push route...'", so I
can see if push/pull and route installation works.


If you're interested, I can unicast you the full file I use for
my DCO client tests, with different ciphers, some instances with
compression (= does it properly fall back?), some with http/socks
proxy, etc., plus a set of client+ca certificates to run against
our test server.


[..]
> >    ... this *does* look correct, but there is never a reply from the other
> >    end, so something is not right.  Either inside or outside.
>
> Is your server in this scenario a FreeBSD DCO machine, or Linux DCO or something else?
> In my setup the server is non-DCO FreeBSD.

FreeBSD 14 DCO -> FreeBSD 13.0 pf(4) firewall -> Internet -> FreeBSD 11.4

If I --disable-dco, the fragmented ping works, so "generally" the
path handles fragments.  Something is different with DCO, so I'll do
more tcpdumping tomorrow and see if I can find out.

gert
Kristof Provost via Openvpn-devel Aug. 12, 2022, 3:40 a.m. UTC | #7
Remarks inline. Mostly ACK.

I’ll post an updated version soon. (I’ve also added a check for UDP in dco_check_option_conflict_ce().

On 10 Aug 2022, at 18:32, Gert Doering wrote:
> On Mon, Aug 08, 2022 at 04:34:23PM +0200, Kristof Provost via Openvpn-devel wrote:
>> diff --git a/src/openvpn/dco_freebsd.c b/src/openvpn/dco_freebsd.c
>> new file mode 100644
>> index 00000000..a8a53fe3
>> --- /dev/null
>> +++ b/src/openvpn/dco_freebsd.c
>> @@ -0,0 +1,636 @@
> [..]
>> +static nvlist_t *
>> +sockaddr_to_nvlist(const struct sockaddr *sa)
>> +{
> [..]
>> +        default:
>> +            abort();
>
> please use "ASSERT(0);" here.  It will do the same thing, but if ever
> hit, it will print a __FILE__, __LINE__ message to the log, so it's
> easier for us to see *where* it triggered a "this must never happen"
> condition.
>
Fixed.

>> +int
>> +dco_new_peer(dco_context_t *dco, unsigned int peerid, int sd,
>> +             struct sockaddr *localaddr, struct sockaddr *remoteaddr,
>> +             struct in_addr *remote_in4, struct in6_addr *remote_in6)
>> +{
>> +    struct ifdrv drv;
>> +    nvlist_t *nvl;
>> +    int ret;
>> +
>> +    nvl = nvlist_create(0);
>
> We use C99 these days, so this could be written as
>
>> +    nvlist_t *nvl = nvlist_create(0);
>
> but this is a matter of personal preference.  Both are fine, just wanted
> to point out that the option exists.
>
That’s FreeBSD’s style(9) showing here. If it’s acceptable to OpenVPN I’m just going to keep it as-is.

>> +
>> +    nvlist_add_number(nvl, "fd", sd);
>> +    nvlist_add_number(nvl, "peerid", peerid);
>> +
>> +    bzero(&drv, sizeof(drv));
>> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
>> +    drv.ifd_cmd = OVPN_NEW_PEER;
>> +    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
>> +
>> +    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
>> +    free(drv.ifd_data);
>
> What happens should nvlist_pack() fail here?  Manpage says it returns
> NULL, will the ioctl() handle this gracefully, or will something crash?
>
That’d really only happen if a memory allocation fails, and given modern OS’s overcommit behaviour that’s also not going to happen. (If you manage to run the system out of memory malloc() will still give you virtual memory, but the OOM killer will come to visit when you get around to actually using it.)

> (If the ioctl() returns something like EINVAL in this case, perfectly fine)
>
The copyin() in the kernel will fail, and that’d return an error, without crashing things.
So, yes, that’s what’d happen.

>> +    nvlist_destroy(nvl);
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to create new peer %d", errno);
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>
> Most functions in OpenVPN that do use a "ret" variable would look
> more like this:
>
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to create new peer %d", errno);
>> +    }
>> +
>> +    return ret;
>> +}
>
> we do have all sorts of variants (*sigh*), but since you also used both
> versions, let's go for this one.
>
Fixed.

>> +        msg(D_DCO, "Failed to create new peer %d", errno);
>
> this can be written as
>
>> +        msg(D_DCO|M_ERRNO, "Failed to create new peer");
>
> M_ERRNO will automatically append the strerror(errno) stuff.
>
Oh neat. I’ve made that change.

> D_DCO is "--verb 3", so that message won't be visible in normal operation
> - if we consider this an error, then it should be
>
>> +        msg(M_WARN|M_ERRNO, "Failed to create new peer");
>
> instead (or if it's "fatal error, give up" -> M_ERR).
>
These are generally fatal errors, yes.
There are follow-up error messages from the calling code that do get logged, but they’re usually less clear about what the actual problem is. I’ll see about changing D_DCO -> M_ERR or M_WARN.

> Now, I'm not sure if accessing errno after the free() call is guaranteed
> to be save - so maybe move the msg() call up?
>
I’m pretty sure free() doesn’t mess with errno (as in, I’ve looked at the jemalloc code), but it’s better to not have to check.
Happily, thanks to your earlier suggestion it’s much easier to just move the free after the log, so I’ll do that.

>> +bool
>> +ovpn_dco_init(int mode, dco_context_t *dco)
>> +{
>> +    if (open_fd(dco) < 0)
>> +    {
>> +        msg(D_DCO, "Failed to open socket");
>
> Same here - if you want this to be heard, D_DCO should be M_WARN|M_ERRNO.
>
> If this is just informational, because the caller will make sufficient
> noise, D_DCO|M_ERRNO is good (but you might want to point out that this
> is the "DCO socket" that couldn't be opened).
>
Yeah, Iv’e gone through and made them all a bit louder. It’s all failure conditions, so being overly verbose there isn’t really an issue.

>> +static int
>> +create_interface(struct tuntap *tt, const char *dev)
>> +{
>> +    int ret;
>> +    struct ifreq ifr;
>> +
>> +    bzero(&ifr, sizeof(ifr));
>
> There is a convenience macro here, CLEAR(ifr)  (which does exactly
> this - #define CLEAR(x) memset(&(x), 0, sizeof(x)).
>
Neat. Changed.

>> +    /* Create ovpnx first, then rename it. */
>> +    snprintf(ifr.ifr_name, IFNAMSIZ, "ovpn");
>> +    ret = ioctl(tt->dco.fd, SIOCIFCREATE2, &ifr);
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_name, errno);
>> +        return ret;
>> +    }
>
> -> M_ERRNO
Done.
>
>> +    /* Rename */
>> +    if (!strcmp(dev, "tun"))
>> +    {
>> +        ifr.ifr_data = "ovpn";
>> +    }
>> +    else
>> +    {
>> +        ifr.ifr_data = (char *)dev;
>> +    }
>
> I'm not sure I understand this code part.  When would dev be "tun"
> here, triggering a rename to "ovpn"?
>
> The call chain leading to this is (now)
>
>  - tun.c:open_tun_dco_generic()
>    - open_tun_dco()
>      - create_interface(tt,dev)
>
> and tun_dco_generic() should never pass in a bare "tun" name (because
> in that case, it would iterate "tun0, tun1, tun2").
>
> Is this a safeguard against "bad things will happen in kernel land if
> we use the unqualified name of another driver"?
>
No, it’s that OpenVPN lets you pick the interface name, and FreeBSD figures out which driver it needs to call based on the interface name.
So as long as users decide they want the interface to be ovpnX it’ll be fine, but if they want ‘vpn0’ or even something silly like ‘vlan1’ the kernel isn’t going to create an if_ovpn interface.

So we create opvnX first, and then rename it to whatever the user wants it to be.

>> +    ret = ioctl(tt->dco.fd, SIOCSIFNAME, &ifr);
>> +    if (ret)
>> +    {
>> +        /* Delete the created interface again. */
>> +        (void)ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
>> +        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_data, errno);
>
> -> M_ERRNO
>
> (there's more, but no need to point them out individually :-) )
>
Fixed.

>> +static int
>> +remove_interface(struct tuntap *tt)
>> +{
>> +    int ret;
>> +    struct ifreq ifr;
>> +
>> +    bzero(&ifr, sizeof(ifr));
>> +    snprintf(ifr.ifr_name, IFNAMSIZ, "%s", tt->dco.ifname);
>> +
>> +    ret = ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to remove interface %s: %d", ifr.ifr_name, errno);
>> +        return ret;
>> +    }
>> +
>> +    tt->dco.ifname[0] = 0;
>> +
>> +    return 0;
>> +}
>
> Maybe always clear the ifname, and do "return ret;" here too?
>
Good idea.

>> +static int
>> +start_tun(dco_context_t *dco)
>> +{
>> +    struct ifdrv drv;
>> +    int ret;
>> +
>> +    bzero(&drv, sizeof(drv));
>
> CLEAR(drv);
>
Fixed.

>> +int
>> +dco_do_read(dco_context_t *dco)
>> +{
>> +    struct ifdrv drv;
>> +    uint8_t buf[4096];
>> +    nvlist_t *nvl;
>> +    const uint8_t *pkt;
>> +    size_t pktlen;
>> +    int ret;
>> +
>> +    /* Flush any pending data from the pipe. */
>> +    (void)read(dco->pipefd[1], buf, sizeof(buf));
>> +
>> +    bzero(&drv, sizeof(drv));
>> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
>> +    drv.ifd_cmd = OVPN_GET_PKT;
>> +    drv.ifd_data = buf;
>> +    drv.ifd_len = sizeof(buf);
>> +
>> +    ret = ioctl(dco->fd, SIOCGDRVSPEC, &drv);
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to read control packet %d", errno);
>> +        return errno;
>
> Documentation (dco.h) says "0 on success or a negative error code otherwise",
> so this needs to be "return -errno;" to behave the same as dco_linux
>
Fixed.
>> +    }
>> +
>> +    nvl = nvlist_unpack(buf, drv.ifd_len, 0);
>> +    if (!nvl)
>> +    {
>> +        msg(D_DCO, "Failed to unpack nvlist");
>> +        return EINVAL;
>
> -EINVAL
>
Fixed.

>> +int
>> +dco_do_write(dco_context_t *dco, int peer_id, struct buffer *buf)
>> +{
>> +    struct ifdrv drv;
>> +    nvlist_t *nvl;
>> +    int ret;
>> +
>> +    nvl = nvlist_create(0);
>> +
>> +    nvlist_add_binary(nvl, "packet", BSTR(buf), BLEN(buf));
>> +    nvlist_add_number(nvl, "peerid", peer_id);
>> +
>> +    bzero(&drv, sizeof(drv));
>> +    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
>> +    drv.ifd_cmd = OVPN_SEND_PKT;
>> +    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
>> +
>> +    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
>> +    free(drv.ifd_data);
>> +    nvlist_destroy(nvl);
>> +    if (ret)
>> +    {
>> +        msg(D_DCO, "Failed to send control packet %d", errno);
>> +        return ret;
>> +    }
>> +
>> +    return BLEN(buf);
>> +}
>
> The return code of dco_do_write() is not clearly defined in dco.h
> (Antonio, looking at you...!) but the linux version seems to do
> "negative is error, positive is buffer length written", so this should
> be fine.  Might need a change to "return -errno".
>
Makes sense. Done.

>> +bool
>> +dco_available(int msglevel)
>> +{
>> +    struct if_clonereq ifcr;
>> +    char *buf = NULL;
>> +    int fd;
>> +    int ret;
>> +    bool available = false;
>> +
>> +    /* Attempt to load the module. Ignore errors, because it might already be
>> +     * loaded, or built into the kernel. */
>> +    (void)kldload("if_ovpn");
>> +
>> +    fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
>> +    if (fd < 0)
>> +    {
>> +        return false;
>> +    }
>> +
>> +    bzero(&ifcr, sizeof(ifcr));
>
> CLEAR(ifcr)
>
Fixed.

>> +    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
>> +    if (ret != 0)
>> +    {
>> +        goto out;
>> +    }
>> +
>> +    buf = malloc(ifcr.ifcr_total * IFNAMSIZ);
>> +
>> +    ifcr.ifcr_count = ifcr.ifcr_total;
>> +    ifcr.ifcr_buffer = buf;
>> +    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
>> +    if (ret != 0)
>> +    {
>> +        goto out;
>> +    }
>> +
>> +    for (int i = 0; i < ifcr.ifcr_total; i++)
>> +    {
>> +        if (strcmp(buf + (i * IFNAMSIZ), "openvpn") == 0)
>
> This looks fairly magic to me - am I reading this right that the first
> call returns a number of "somethings", then we malloc(something * IFNAMSIZ),
> and the second call returns a list of strings that describe "something",
> and if there is one of them named "openvpn", we know that DCO is
> available?
>
Correct. The first call is to figure out how much space we need, we then allocate that and get the list.

> Why is it called "openvpn", not "ovpn" (if_ovpn, man ovpn)?
>
That’s what the driver chose to do. It’s a bit cheap of me to blame the driver author, but he’s an ass anyway ;)
More seriously, that’s mostly because that also ensures that the interfaces are automatically in the ‘openvpn’ netif group, which is something that makes pfsense’s life easier.

> (A few lines of comment on what SIOCIFGCLONERS does - or which manpage
> to look at - would be appreciated here :-) )
>
It’s a BSD-ism. Some types of interfaces (essentially any interface that’s not actual hardware) is created through ‘cloning’. The relevant code is in sys/net/if_clone.c in the kernel.

You can list them with “ifconfig -C”.

We use it as a way to figure out if DCO is supported or not. We could potentially also enumerate the loaded kernel modules and check for if_ovpn.ko, but that’d break if someone built that into the kernel.

>> +const char *
>> +dco_get_supported_ciphers()
>> +{
>> +    return "none:AES-256-GCM:AES-128-GCM:CHACHA20-POLY1305";
>> +}
>
> Is "none" indeed supported?  I find that useful to test, but Arne and
> Antonio refuse(d) to support it in linux DCO :-)
>
It is, yes, although I have to admit that it’s certainly the least tested mode. (Because if you really want to tunnel without crypto you can just use if_gif, without needing the OpenVPN userspace code.)

>> diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c
>> index 55c939c4..14ad24fa 100644
>> --- a/src/openvpn/forward.c
>> +++ b/src/openvpn/forward.c
>
> ... these are all very straightforward "LINUX" -> "LINUX || FREEBSD" :-)
>
If at some point someone adds OpenBSD or NetBSD or Illumos or .. it’s going to be worth thinking about a ‘PLATFORM_HAS_DCO’, or ‘UNIX_Y_HAS_DCO’ or similar define for these.

>> diff --git a/src/openvpn/tun.c b/src/openvpn/tun.c
>> index 54f7d72c..e84d5a27 100644
>> --- a/src/openvpn/tun.c
>> +++ b/src/openvpn/tun.c
> [..]
>> @@ -2917,20 +2917,24 @@ void
>>  open_tun(const char *dev, const char *dev_type, const char *dev_node, struct tuntap *tt,
>>           openvpn_net_ctx_t *ctx)
>>  {
>> -    open_tun_generic(dev, dev_type, dev_node, tt);
>> -
>> -    if (tt->fd >= 0 && tt->type == DEV_TYPE_TUN)
>> -    {
>> -        int i = IFF_POINTOPOINT | IFF_MULTICAST;
>> +    if (tun_dco_enabled(tt)) {
>> +        open_tun_dco_generic(dev, dev_type, tt, ctx);
>> +    } else {
>> +        open_tun_generic(dev, dev_type, dev_node, tt);
>
> This one will upset the whitespace dragon and it will eat all of
> the patch :-)
>
> OpenVPN bracket convention requires this to be:
>
> +    if (tun_dco_enabled(tt))
> +    {
> +        open_tun_dco_generic(dev, dev_type, tt, ctx);
> +    }
> +    else
> +    {
> +        open_tun_generic(dev, dev_type, dev_node, tt);
>
> (I run uncrustify on merge, so this would get fixed - but it would
> be good to have it properly formatted right away)
>
Whoops. I thought I’d been good about following local style, but I seem to have missed one. Fixed.

Best regards,
Kristof
Gert Doering Aug. 12, 2022, 10:10 p.m. UTC | #8
Hi,

On Thu, Aug 11, 2022 at 05:25:05PM +0200, Kristof Provost via Openvpn-devel wrote:
> >  - running openvpn over TCP gives me a kernel panic - this is not so
> >    nice... (see attached .png from the vmware console) - userland seems
> >    to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
> >    (so intentional panic, not corruption panic).
> >
> >    This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
> >
> I???ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
> I simply didn???t think that user space might give us a non-UDP
> socket, so checking for that and rejecting the peer in that case
> fixes the panic. Thanks for finding that.

JFTR, I have tested "main-n257320-3a3af6b2a16" with the old DCO userland
patch, and it no longer crashes.  Of course the TCP tests failed, because
userland only sees "mmmh, it fails!" but has no idea it should fall back
to non-DCO  (with the new userland patches, this works).

In case you plan to include kernel TCP support, it would be good to
have this "soonish" - like, before FreeBSD 14 and OpenVPN 2.6.0 release,
because otherwise this will be a bit painful to synchronize.

gert
Kristof Provost via Openvpn-devel Aug. 12, 2022, 10:24 p.m. UTC | #9
On 13 Aug 2022, at 10:10, Gert Doering wrote:
> On Thu, Aug 11, 2022 at 05:25:05PM +0200, Kristof Provost via Openvpn-devel wrote:
>>>  - running openvpn over TCP gives me a kernel panic - this is not so
>>>    nice... (see attached .png from the vmware console) - userland seems
>>>    to assume "kernel can do TCP", kernel panics on "if !udp, panic()"
>>>    (so intentional panic, not corruption panic).
>>>
>>>    This is on freebsd git FreeBSD 14.0-CURRENT #1 main-n257130-c0665d5c824
>>>
>> I???ve pushed a fix for this panic in fd6b3bede5a5c210f327e5c9bd3e415ee905048b.
>> I simply didn???t think that user space might give us a non-UDP
>> socket, so checking for that and rejecting the peer in that case
>> fixes the panic. Thanks for finding that.
>
> JFTR, I have tested "main-n257320-3a3af6b2a16" with the old DCO userland
> patch, and it no longer crashes.  Of course the TCP tests failed, because
> userland only sees "mmmh, it fails!" but has no idea it should fall back
> to non-DCO  (with the new userland patches, this works).
>
Thanks!

> In case you plan to include kernel TCP support, it would be good to
> have this "soonish" - like, before FreeBSD 14 and OpenVPN 2.6.0 release,
> because otherwise this will be a bit painful to synchronize.
>
There’s not plan to add TCP support at the moment.

Best regards,
Kristof
Kristof Provost via Openvpn-devel Aug. 12, 2022, 11:45 p.m. UTC | #10
On 11 Aug 2022, at 23:11, Gert Doering wrote:
> If you're interested, I can unicast you the full file I use for
> my DCO client tests, with different ciphers, some instances with
> compression (= does it properly fall back?), some with http/socks
> proxy, etc., plus a set of client+ca certificates to run against
> our test server.
>
That’d be useful, yes. I’ve not yet been able to get the tests to run the way they’re supposed to.

Best regards,
Kristof
Gert Doering Sept. 12, 2022, 2:09 a.m. UTC | #11
Hi,

(copying back the list)

On Mon, Aug 15, 2022 at 03:42:38PM +0200, Kristof Provost wrote:
> Thanks. That works, and I also see the failure with fragmented packets.
> I still have no idea why though. Things look correct on the sending 
> side.
> 
> I did spend a little time finding the exact size where things break, and 
> that???s kind of interesting.
> `sudo ping -c 1 -s 1460 10.194.2.1` succeeds. One byte larger fails.
> 
> That???s very odd, because at 1460 payload bytes we???re already 
> fragmenting the OpenVPN UDP packet (and are still getting a reply), so 
> it looks like the fragmentation in the DCO code does actually work. 
> I???ve also tested larger packets locally, both to a FreeBSD and a Linux 
> server, and those setups also work.

Indeed, I can verify this - ping -s 1460 works, with outside fragmentation

13:49:51.927371 IP 194.97.140.5.11281 > 199.102.77.82.51194: UDP, bad length 1512 > 1472
13:49:51.927392 IP 194.97.140.5 > 199.102.77.82: ip-proto-17
13:49:52.051249 IP 199.102.77.82.51194 > 194.97.140.5.11281: UDP, bad length 1512 > 1472
13:49:52.051272 IP 199.102.77.82 > 194.97.140.5: ip-proto-17

... while ping -s 1461 does not, and as you say, no inside fragmentation

13:50:27.586117 IP 10.194.2.250 > 10.194.0.1: ICMP echo request, id 49007, seq 0, length 1469
13:50:28.617012 IP 10.194.2.250 > 10.194.0.1: ICMP echo request, id 49007, seq 1, length 1469

13:50:27.586383 IP 194.97.140.5.11281 > 199.102.77.82.51194: UDP, bad length 1528 > 1472
13:50:27.586410 IP 194.97.140.5 > 199.102.77.82: ip-proto-17
13:50:28.617296 IP 194.97.140.5.11281 > 199.102.77.82.51194: UDP, bad length 1528 > 1472
13:50:28.617316 IP 194.97.140.5 > 199.102.77.82: ip-proto-17


it *does* bump the outside packet length up by +16 bytes ("bad length 1512" -> 
"1528").  Smells cipher algorithm padding or so - but why 16?  And why pad
at all (AES-256-GCM used, so I think we should not pad)?

Side note: with --disable-dco, I see

13:54:01.612513 IP 194.97.140.5.15009 > 199.102.77.82.51194: UDP, bad length 1513 > 1472
13:54:01.612531 IP 194.97.140.5 > 199.102.77.82: ip-proto-17
13:54:01.736280 IP 199.102.77.82.51194 > 194.97.140.5.15009: UDP, bad length 1513 > 1472
13:54:01.736305 IP 199.102.77.82 > 194.97.140.5: ip-proto-17

so "1513".


Staring at the (outside) packets quite hard with wireshark seems to
find nothing wrong, except that the DCO packets are larger

DCO (ping -s 1461 inside):

    [2 IPv4 Fragments (1536 bytes): #1(1480), #2(56)]
        [Frame: 1, payload: 0-1479 (1480 bytes)]
        [Frame: 2, payload: 1480-1535 (56 bytes)]
        [Fragment count: 2]
        [Reassembled IPv4 length: 1536]
        [Reassembled IPv4 data: 3a2ec7fa060000004800000100000007f05d4cba392aa46a1fc360c7f6f2f9501627a9fd?]

User Datagram Protocol, Src Port: 14894, Dst Port: 51194
[..]
    Length: 1536
    UDP payload (1528 bytes)


--disable-dco

    [2 IPv4 Fragments (1521 bytes): #17(1480), #18(41)]
        [Frame: 17, payload: 0-1479 (1480 bytes)]
        [Frame: 18, payload: 1480-1520 (41 bytes)]
        [Fragment count: 2]
        [Reassembled IPv4 length: 1521]
        [Reassembled IPv4 data: c7fa3aa105f181a54800000000000013d5180c49e3b09a9cd7c61c07e1e5392d3e45f517?]
User Datagram Protocol, Src Port: 51194, Dst Port: 15009
[..]
    Length: 1521
    UDP payload (1513 bytes)
Data (1513 bytes)


There's a FreeBSD pf(4) in between the client and the server, and
that firewall finds the packets acceptable - so I think the outside
packets are good.


But THERE is an indication in the log:

Sep 12 08:01:59 phillip tun-udp-p2mp[52730]: freebsd-14-amd64/194.97.140.5:14894 tun packet too large on write (tried=1504,max=1500)

this is the "I have received a packet from the network, decrypted it, and
tried to send it onwards towards the tun interface, and it was too large".

So my guess is that "something" gets confused on the sending side DCO, and
corrupts the payload size (-> so '-s 1461' becomes '-s 1476' = 16 byte
increase instead of +1, resulting in 'ip packet size 1504' on the receiving
end).  The t_client server is running a slightly older master, which 
enforces the "mtu is 1500, so no bigger packets", newer openvpn are 
are somewhat more permissive on incoming baby giants.


So, observation suggests "it's happening inside the DCO module".  I'll
go instrument my kernel with printf()'s now... and will report if I find
anything useful.

gert
Kristof Provost Sept. 12, 2022, 2:43 a.m. UTC | #12
On 12 Sep 2022, at 14:09, Gert Doering wrote:
> it *does* bump the outside packet length up by +16 bytes ("bad length 1512" ->
> "1528").  Smells cipher algorithm padding or so - but why 16?  And why pad
> at all (AES-256-GCM used, so I think we should not pad)?
>
I would still expect padding. AES will operate on 16 byte blocks, so no matter the block chaining mode we’re going to have that multiple-of-16-bytes thing.

> Sep 12 08:01:59 phillip tun-udp-p2mp[52730]: freebsd-14-amd64/194.97.140.5:14894 tun packet too large on write (tried=1504,max=1500)
>
> this is the "I have received a packet from the network, decrypted it, and
> tried to send it onwards towards the tun interface, and it was too large".
>
> So my guess is that "something" gets confused on the sending side DCO, and
> corrupts the payload size (-> so '-s 1461' becomes '-s 1476' = 16 byte
> increase instead of +1, resulting in 'ip packet size 1504' on the receiving
> end).  The t_client server is running a slightly older master, which
> enforces the "mtu is 1500, so no bigger packets", newer openvpn are
> are somewhat more permissive on incoming baby giants.
>
That’s very interesting information. You may be closing in on the cause.
What version do you run on the t_client server? Perhaps that will help me to reproduce it.

> So, observation suggests "it's happening inside the DCO module".  I'll
> go instrument my kernel with printf()'s now... and will report if I find
> anything useful.
>
Thanks!

I’m on my way to Vienna for EuroBSDCon now, so I will be distracted until early next week, but when I’m back I should be able to dig into this as well.

Best regards,
Kristof
Gert Doering Sept. 12, 2022, 2:45 a.m. UTC | #13
Hi,

On Mon, Sep 12, 2022 at 02:09:52PM +0200, Gert Doering wrote:
> So, observation suggests "it's happening inside the DCO module".  I'll
> go instrument my kernel with printf()'s now... and will report if I find
> anything useful.

ok... so at the beginning of ovpn_transmit_to_peer(), I have

ping -s 1460
Sep 12 14:36:34 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1488

ping -s 1461
Sep 12 14:36:43 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1489

-> check!

... and then there is code that deals with rounding up...?!

--------------- snip --------------
printf( "GERT: ovpn_transmit_to_peer, real_len=%d\n", (int) real_len );

        ovpn_hdr_len = sizeof(struct ovpn_wire_header);
        if (key->encrypt->cipher == OVPN_CIPHER_ALG_NONE)
                ovpn_hdr_len -= 16; /* No auth tag. */
        else {
                /* Round up the len to a multiple of our block size. */
                len = roundup2(real_len, AES_BLOCK_LEN);

                /* Now extend the mbuf. */
                m_append(m, len - real_len, EMPTY_BUFFER);
        }

printf( "GERT: ovpn_transmit_to_peer, len=%d\n", (int) len );
--------------- snip --------------

and after this block:

Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1489
Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, real_len=1489
Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, len=1504

Wohoo, 1504!  +16!


Now, I have no idea about crypto, *and* I have no idea about OpenVPN
wire format (Arne is the resident expert on this), but Arne tells me
"there is no padding needed"

14:00 <@cron2__> is there padding with AES-GCM?
14:04 <@plaisthos> cron2__: no. AES-GCM is basically CTR and a stream cipher

... so, not sure what that code does.

If I just kill it :-)

                /* Round up the len to a multiple of our block size. */
                // len = roundup2(real_len, AES_BLOCK_LEN);

I can ping just fine...

gert@fbsd14:/usr/obj $ SU ping -s 1461 10.194.0.1
PING 10.194.0.1 (10.194.0.1): 1461 data bytes
1469 bytes from 10.194.0.1: icmp_seq=0 ttl=64 time=124.774 ms
1469 bytes from 10.194.0.1: icmp_seq=1 ttl=64 time=124.930 ms

and with double fragmentation too...

gert@fbsd14:/usr/obj $ SU ping -s 3000 10.194.0.1
PING 10.194.0.1 (10.194.0.1): 3000 data bytes
3008 bytes from 10.194.0.1: icmp_seq=0 ttl=64 time=126.363 ms
3008 bytes from 10.194.0.1: icmp_seq=1 ttl=64 time=126.642 ms
3008 bytes from 10.194.0.1: icmp_seq=2 ttl=64 time=126.200 ms


Now, I'm not submitting a patch for that, because usually there is
a good reason for rounding up and doing blocks and all that - so, I
found the offending lines, but do not feel qualified for a correct
fix.

gert
Gert Doering Sept. 12, 2022, 2:50 a.m. UTC | #14
Hi,

On Mon, Sep 12, 2022 at 02:43:09PM +0200, Kristof Provost via Openvpn-devel wrote:
> That???s very interesting information. You may be closing in on the cause.
> What version do you run on the t_client server? Perhaps that will help me to reproduce it.

OpenVPN 2.6_git [git:master/26e40c48b89478cb

but I don't think that's actually needed, just looking at the difference
in packet size increase between DCO and non-DCO was already a good 
indication :-)

So, see other mail, it's padding/rounding up, and for whatever reason,
this increases the encapsulated packet size for DCO - which normally gets
ignored on the receiving end, it seems, but for this particular packet
size range, the result is just too big.

[..]
> I???m on my way to Vienna for EuroBSDCon now, so I will be distracted until early next week, but when I???m back I should be able to dig into this as well.

Enjoy!  I've been to EuroBSDcon in Belgrade a few years back, and it was
definitely one of the nicest conferences I've been to - good talks, and
very nice and friendly (and interesting!) people everywhere.  This year,
I need to get other stuff done...

gert
Gert Doering Sept. 12, 2022, 2:55 a.m. UTC | #15
Hi,

On Mon, Sep 12, 2022 at 02:43:09PM +0200, Kristof Provost via Openvpn-devel wrote:
> > it *does* bump the outside packet length up by +16 bytes ("bad length 1512" ->
> > "1528").  Smells cipher algorithm padding or so - but why 16?  And why pad
> > at all (AES-256-GCM used, so I think we should not pad)?
> >
> I would still expect padding. AES will operate on 16 byte blocks, so no matter the block chaining mode we???re going to have that multiple-of-16-bytes thing.

Asking the expert again :-)

14:51 <@plaisthos> aes-ctr generates a multiple of 16 bytes as stream
14:52 <@plaisthos> then you xor the message to that and just use the len of the 
                   message

so yes, 16 byte block, but the message length does not increase.

gert
Kristof Provost Sept. 12, 2022, 3:41 a.m. UTC | #16
On 12 Sep 2022, at 14:45, Gert Doering wrote:
> Hi,
>
> On Mon, Sep 12, 2022 at 02:09:52PM +0200, Gert Doering wrote:
>> So, observation suggests "it's happening inside the DCO module".  I'll
>> go instrument my kernel with printf()'s now... and will report if I find
>> anything useful.
>
> ok... so at the beginning of ovpn_transmit_to_peer(), I have
>
> ping -s 1460
> Sep 12 14:36:34 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1488
>
> ping -s 1461
> Sep 12 14:36:43 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1489
>
> -> check!
>
> ... and then there is code that deals with rounding up...?!
>
> --------------- snip --------------
> printf( "GERT: ovpn_transmit_to_peer, real_len=%d\n", (int) real_len );
>
>         ovpn_hdr_len = sizeof(struct ovpn_wire_header);
>         if (key->encrypt->cipher == OVPN_CIPHER_ALG_NONE)
>                 ovpn_hdr_len -= 16; /* No auth tag. */
>         else {
>                 /* Round up the len to a multiple of our block size. */
>                 len = roundup2(real_len, AES_BLOCK_LEN);
>
>                 /* Now extend the mbuf. */
>                 m_append(m, len - real_len, EMPTY_BUFFER);
>         }
>
> printf( "GERT: ovpn_transmit_to_peer, len=%d\n", (int) len );
> --------------- snip --------------
>
> and after this block:
>
> Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, tunnel_len=1489
> Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, real_len=1489
> Sep 12 14:40:40 fbsd14 kernel: GERT: ovpn_transmit_to_peer, len=1504
>
> Wohoo, 1504!  +16!
>
>
> Now, I have no idea about crypto, *and* I have no idea about OpenVPN
> wire format (Arne is the resident expert on this), but Arne tells me
> "there is no padding needed"
>
> 14:00 <@cron2__> is there padding with AES-GCM?
> 14:04 <@plaisthos> cron2__: no. AES-GCM is basically CTR and a stream cipher
>
> ... so, not sure what that code does.
>
> If I just kill it :-)
>
>                 /* Round up the len to a multiple of our block size. */
>                 // len = roundup2(real_len, AES_BLOCK_LEN);
>
> I can ping just fine...
>
> gert@fbsd14:/usr/obj $ SU ping -s 1461 10.194.0.1
> PING 10.194.0.1 (10.194.0.1): 1461 data bytes
> 1469 bytes from 10.194.0.1: icmp_seq=0 ttl=64 time=124.774 ms
> 1469 bytes from 10.194.0.1: icmp_seq=1 ttl=64 time=124.930 ms
>
> and with double fragmentation too...
>
> gert@fbsd14:/usr/obj $ SU ping -s 3000 10.194.0.1
> PING 10.194.0.1 (10.194.0.1): 3000 data bytes
> 3008 bytes from 10.194.0.1: icmp_seq=0 ttl=64 time=126.363 ms
> 3008 bytes from 10.194.0.1: icmp_seq=1 ttl=64 time=126.642 ms
> 3008 bytes from 10.194.0.1: icmp_seq=2 ttl=64 time=126.200 ms
>
>
> Now, I'm not submitting a patch for that, because usually there is
> a good reason for rounding up and doing blocks and all that - so, I
> found the offending lines, but do not feel qualified for a correct
> fix.
>
The offending code is almost certainly wrong.
I know the guy who wrote them and … he means well ;)

I think I was confused about what was needed in packet size. I’ll try to test your patch in the next couple of days.

Kristof

Patch

diff --git a/configure.ac b/configure.ac
index 353da08c..ed1332da 100644
--- a/configure.ac
+++ b/configure.ac
@@ -787,6 +787,11 @@  dnl
 			AC_DEFINE(ENABLE_DCO, 1, [Enable shared data channel offload])
 			AC_MSG_NOTICE([Enabled ovpn-dco support for Linux])
 			;;
+		*-*-freebsd*)
+			LIBS="${LIBS} -lnv"
+			AC_DEFINE(ENABLE_DCO, 1, [Enable data channel offload for FreeBSD])
+			AC_MSG_NOTICE([Enabled ovpn-dco support for FreeBSD])
+			;;
 		*)
 			AC_MSG_NOTICE([Ignoring --enable-dco on non Linux platform])
 			;;
diff --git a/src/openvpn/Makefile.am b/src/openvpn/Makefile.am
index aaa1dbce..2a139b23 100644
--- a/src/openvpn/Makefile.am
+++ b/src/openvpn/Makefile.am
@@ -54,6 +54,7 @@  openvpn_SOURCES = \
 	crypto_openssl.c crypto_openssl.h \
 	crypto_mbedtls.c crypto_mbedtls.h \
 	dco.c dco.h dco_internal.h \
+	dco_freebsd.c dco_freebsd.h \
 	dco_linux.c dco_linux.h \
 	dhcp.c dhcp.h \
 	dns.c dns.h \
diff --git a/src/openvpn/dco_freebsd.c b/src/openvpn/dco_freebsd.c
new file mode 100644
index 00000000..a8a53fe3
--- /dev/null
+++ b/src/openvpn/dco_freebsd.c
@@ -0,0 +1,636 @@ 
+/*
+ *  Interface to FreeBSD dco networking code
+ *
+ *  Copyright (C) 2022 Rubicon Communications, LLC (Netgate). All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program (see the file COPYING included with this
+ *  distribution); if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#elif defined(_MSC_VER)
+#include "config-msvc.h"
+#endif
+
+#if defined(ENABLE_DCO) && defined(TARGET_FREEBSD)
+
+#include "syshead.h"
+
+#include <sys/param.h>
+#include <sys/linker.h>
+#include <sys/nv.h>
+#include <netinet/in.h>
+
+#include "dco_freebsd.h"
+#include "dco.h"
+#include "tun.h"
+#include "crypto.h"
+#include "ssl_common.h"
+
+static nvlist_t *
+sockaddr_to_nvlist(const struct sockaddr *sa)
+{
+    nvlist_t *nvl = nvlist_create(0);
+
+    nvlist_add_number(nvl, "af", sa->sa_family);
+
+    switch (sa->sa_family)
+    {
+        case AF_INET:
+        {
+            const struct sockaddr_in *in = (const struct sockaddr_in *)sa;
+            nvlist_add_binary(nvl, "address", &in->sin_addr, sizeof(in->sin_addr));
+            nvlist_add_number(nvl, "port", in->sin_port);
+            break;
+        }
+
+        case AF_INET6:
+        {
+            const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)sa;
+            nvlist_add_binary(nvl, "address", &in6->sin6_addr, sizeof(in6->sin6_addr));
+            nvlist_add_number(nvl, "port", in6->sin6_port);
+            break;
+        }
+
+        default:
+            abort();
+    }
+
+    return (nvl);
+}
+
+int
+dco_new_peer(dco_context_t *dco, unsigned int peerid, int sd,
+             struct sockaddr *localaddr, struct sockaddr *remoteaddr,
+             struct in_addr *remote_in4, struct in6_addr *remote_in6)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    nvl = nvlist_create(0);
+
+    msg(D_DCO_DEBUG, "%s: peer-id %d, fd %d", __func__, peerid, sd);
+
+    if (localaddr)
+    {
+        nvlist_add_nvlist(nvl, "local", sockaddr_to_nvlist(localaddr));
+    }
+
+    if (remoteaddr)
+    {
+        nvlist_add_nvlist(nvl, "remote", sockaddr_to_nvlist(remoteaddr));
+    }
+
+    if (remote_in4)
+    {
+        nvlist_add_binary(nvl, "vpn_ipv4", &remote_in4->s_addr,
+                          sizeof(remote_in4->s_addr));
+    }
+
+    if (remote_in6)
+    {
+        nvlist_add_binary(nvl, "vpn_ipv6", remote_in6, sizeof(*remote_in6));
+    }
+
+    nvlist_add_number(nvl, "fd", sd);
+    nvlist_add_number(nvl, "peerid", peerid);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_NEW_PEER;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to create new peer %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int
+open_fd(dco_context_t *dco)
+{
+    int ret;
+
+    ret = pipe2(dco->pipefd, O_CLOEXEC | O_NONBLOCK);
+    if (ret != 0)
+    {
+        return -1;
+    }
+
+    dco->fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+    if (dco->fd != -1)
+    {
+        dco->open = true;
+    }
+    dco->dco_packet_in = alloc_buf(PAGE_SIZE);
+
+    return dco->fd;
+}
+
+static void
+close_fd(dco_context_t *dco)
+{
+    close(dco->pipefd[0]);
+    close(dco->pipefd[1]);
+    close(dco->fd);
+}
+
+bool
+ovpn_dco_init(int mode, dco_context_t *dco)
+{
+    if (open_fd(dco) < 0)
+    {
+        msg(D_DCO, "Failed to open socket");
+        return false;
+    }
+    return true;
+}
+
+static int
+create_interface(struct tuntap *tt, const char *dev)
+{
+    int ret;
+    struct ifreq ifr;
+
+    bzero(&ifr, sizeof(ifr));
+
+    /* Create ovpnx first, then rename it. */
+    snprintf(ifr.ifr_name, IFNAMSIZ, "ovpn");
+    ret = ioctl(tt->dco.fd, SIOCIFCREATE2, &ifr);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_name, errno);
+        return ret;
+    }
+
+    /* Rename */
+    if (!strcmp(dev, "tun"))
+    {
+        ifr.ifr_data = "ovpn";
+    }
+    else
+    {
+        ifr.ifr_data = (char *)dev;
+    }
+    ret = ioctl(tt->dco.fd, SIOCSIFNAME, &ifr);
+    if (ret)
+    {
+        /* Delete the created interface again. */
+        (void)ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
+        msg(D_DCO, "Failed to create interface %s: %d", ifr.ifr_data, errno);
+        return ret;
+    }
+
+    snprintf(tt->dco.ifname, IFNAMSIZ, "%s", ifr.ifr_data);
+    tt->actual_name = string_alloc(tt->dco.ifname, NULL);
+
+    return 0;
+}
+
+static int
+remove_interface(struct tuntap *tt)
+{
+    int ret;
+    struct ifreq ifr;
+
+    bzero(&ifr, sizeof(ifr));
+    snprintf(ifr.ifr_name, IFNAMSIZ, "%s", tt->dco.ifname);
+
+    ret = ioctl(tt->dco.fd, SIOCIFDESTROY, &ifr);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to remove interface %s: %d", ifr.ifr_name, errno);
+        return ret;
+    }
+
+    tt->dco.ifname[0] = 0;
+
+    return 0;
+}
+
+int
+open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev)
+{
+    int ret;
+
+    ret = create_interface(tt, dev);
+
+    if (ret < 0)
+    {
+        msg(D_DCO, "Failed to create interface");
+    }
+
+    return ret;
+}
+
+void
+close_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx)
+{
+    remove_interface(tt);
+    close_fd(&tt->dco);
+}
+
+int
+dco_swap_keys(dco_context_t *dco, unsigned int peerid)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    msg(D_DCO_DEBUG, "%s: peer-id %d", __func__, peerid);
+
+    nvl = nvlist_create(0);
+    nvlist_add_number(nvl, "peerid", peerid);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_SWAP_KEYS;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to swap keys %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+int
+dco_del_peer(dco_context_t *dco, unsigned int peerid)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    nvl = nvlist_create(0);
+    nvlist_add_number(nvl, "peerid", peerid);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_DEL_PEER;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to delete peer %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+int
+dco_del_key(dco_context_t *dco, unsigned int peerid,
+            dco_key_slot_t slot)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    msg(D_DCO, "%s: peer-id %d, slot %d", __func__, peerid, slot);
+
+    nvl = nvlist_create(0);
+    nvlist_add_number(nvl, "slot", slot);
+    nvlist_add_number(nvl, "peerid", peerid);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_DEL_KEY;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to delete key %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+static nvlist_t *
+key_to_nvlist(const uint8_t *key, const uint8_t *implicit_iv, const char *ciphername)
+{
+    nvlist_t *nvl;
+    size_t key_len;
+
+    nvl = nvlist_create(0);
+
+    nvlist_add_string(nvl, "cipher", ciphername);
+
+    if (strcmp(ciphername, "none") != 0)
+    {
+        key_len = cipher_kt_key_size(ciphername);
+
+        nvlist_add_binary(nvl, "key", key, key_len);
+        nvlist_add_binary(nvl, "iv", implicit_iv, 8);
+    }
+
+    return (nvl);
+}
+
+static int
+start_tun(dco_context_t *dco)
+{
+    struct ifdrv drv;
+    int ret;
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_START_VPN;
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to start vpn %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+int
+dco_new_key(dco_context_t *dco, unsigned int peerid, int keyid,
+            dco_key_slot_t slot,
+            const uint8_t *encrypt_key, const uint8_t *encrypt_iv,
+            const uint8_t *decrypt_key, const uint8_t *decrypt_iv,
+            const char *ciphername)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    msg(D_DCO_DEBUG, "%s: slot %d, key-id %d, peer-id %d, cipher %s",
+        __func__, slot, keyid, peerid, ciphername);
+
+    nvl = nvlist_create(0);
+
+    nvlist_add_number(nvl, "slot", slot);
+    nvlist_add_number(nvl, "keyid", keyid);
+    nvlist_add_number(nvl, "peerid", peerid);
+
+    nvlist_add_nvlist(nvl, "encrypt",
+                      key_to_nvlist(encrypt_key, encrypt_iv, ciphername));
+    nvlist_add_nvlist(nvl, "decrypt",
+                      key_to_nvlist(decrypt_key, decrypt_iv, ciphername));
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_NEW_KEY;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to set key %d", errno);
+        return ret;
+    }
+
+    return start_tun(dco);
+}
+
+int
+dco_set_peer(dco_context_t *dco, unsigned int peerid,
+             int keepalive_interval, int keepalive_timeout,
+             int mss)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    nvl = nvlist_create(0);
+    nvlist_add_number(nvl, "peerid", peerid);
+    nvlist_add_number(nvl, "interval", keepalive_interval);
+    nvlist_add_number(nvl, "timeout", keepalive_timeout);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_SET_PEER;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to set keepalive %d", errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+int
+dco_do_read(dco_context_t *dco)
+{
+    struct ifdrv drv;
+    uint8_t buf[4096];
+    nvlist_t *nvl;
+    const uint8_t *pkt;
+    size_t pktlen;
+    int ret;
+
+    /* Flush any pending data from the pipe. */
+    (void)read(dco->pipefd[1], buf, sizeof(buf));
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_GET_PKT;
+    drv.ifd_data = buf;
+    drv.ifd_len = sizeof(buf);
+
+    ret = ioctl(dco->fd, SIOCGDRVSPEC, &drv);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to read control packet %d", errno);
+        return errno;
+    }
+
+    nvl = nvlist_unpack(buf, drv.ifd_len, 0);
+    if (!nvl)
+    {
+        msg(D_DCO, "Failed to unpack nvlist");
+        return EINVAL;
+    }
+
+    dco->dco_message_peer_id = nvlist_get_number(nvl, "peerid");
+
+    if (nvlist_exists_binary(nvl, "packet"))
+    {
+        pkt = nvlist_get_binary(nvl, "packet", &pktlen);
+        memcpy(BPTR(&dco->dco_packet_in), pkt, pktlen);
+        dco->dco_packet_in.len = pktlen;
+        dco->dco_message_type = OVPN_CMD_PACKET;
+    }
+    else
+    {
+        dco->dco_del_peer_reason = OVPN_DEL_PEER_REASON_EXPIRED;
+        dco->dco_message_type = OVPN_CMD_DEL_PEER;
+    }
+
+    nvlist_destroy(nvl);
+
+    return 0;
+}
+
+int
+dco_do_write(dco_context_t *dco, int peer_id, struct buffer *buf)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    int ret;
+
+    nvl = nvlist_create(0);
+
+    nvlist_add_binary(nvl, "packet", BSTR(buf), BLEN(buf));
+    nvlist_add_number(nvl, "peerid", peer_id);
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_SEND_PKT;
+    drv.ifd_data = nvlist_pack(nvl, &drv.ifd_len);
+
+    ret = ioctl(dco->fd, SIOCSDRVSPEC, &drv);
+    free(drv.ifd_data);
+    nvlist_destroy(nvl);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to send control packet %d", errno);
+        return ret;
+    }
+
+    return BLEN(buf);
+}
+
+bool
+dco_available(int msglevel)
+{
+    struct if_clonereq ifcr;
+    char *buf = NULL;
+    int fd;
+    int ret;
+    bool available = false;
+
+    /* Attempt to load the module. Ignore errors, because it might already be
+     * loaded, or built into the kernel. */
+    (void)kldload("if_ovpn");
+
+    fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+    if (fd < 0)
+    {
+        return false;
+    }
+
+    bzero(&ifcr, sizeof(ifcr));
+
+    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
+    if (ret != 0)
+    {
+        goto out;
+    }
+
+    buf = malloc(ifcr.ifcr_total * IFNAMSIZ);
+
+    ifcr.ifcr_count = ifcr.ifcr_total;
+    ifcr.ifcr_buffer = buf;
+    ret = ioctl(fd, SIOCIFGCLONERS, &ifcr);
+    if (ret != 0)
+    {
+        goto out;
+    }
+
+    for (int i = 0; i < ifcr.ifcr_total; i++)
+    {
+        if (strcmp(buf + (i * IFNAMSIZ), "openvpn") == 0)
+        {
+            available = true;
+            goto out;
+        }
+    }
+
+out:
+    free(buf);
+    close(fd);
+
+    return available;
+}
+
+void
+dco_event_set(dco_context_t *dco, struct event_set *es, void *arg)
+{
+    struct ifdrv drv;
+    nvlist_t *nvl;
+    uint8_t buf[128];
+    int ret;
+
+    if (!dco || !dco->open)
+    {
+        return;
+    }
+
+    bzero(&drv, sizeof(drv));
+    snprintf(drv.ifd_name, IFNAMSIZ, "%s", dco->ifname);
+    drv.ifd_cmd = OVPN_POLL_PKT;
+    drv.ifd_len = sizeof(buf);
+    drv.ifd_data = buf;
+
+    ret = ioctl(dco->fd, SIOCGDRVSPEC, &drv);
+    if (ret)
+    {
+        msg(D_DCO, "Failed to poll for packets %d", errno);
+        return;
+    }
+
+    nvl = nvlist_unpack(buf, drv.ifd_len, 0);
+    if (!nvl)
+    {
+        msg(D_DCO, "Failed to unpack nvlist");
+        return;
+    }
+
+    if (nvlist_get_number(nvl, "pending") > 0)
+    {
+        (void)write(dco->pipefd[0], " ", 1);
+        event_ctl(es, dco->pipefd[1], EVENT_READ, arg);
+    }
+
+    nvlist_destroy(nvl);
+}
+
+const char *
+dco_get_supported_ciphers()
+{
+    return "none:AES-256-GCM:AES-128-GCM:CHACHA20-POLY1305";
+}
+
+#endif /* defined(ENABLE_DCO) && defined(TARGET_FREEBSD) */
diff --git a/src/openvpn/dco_freebsd.h b/src/openvpn/dco_freebsd.h
new file mode 100644
index 00000000..3594f229
--- /dev/null
+++ b/src/openvpn/dco_freebsd.h
@@ -0,0 +1,59 @@ 
+/*
+ *  Interface to FreeBSD dco networking code
+ *
+ *  Copyright (C) 2022 Rubicon Communications, LLC (Netgate). All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program (see the file COPYING included with this
+ *  distribution); if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef DCO_FREEBSD_H
+#define DCO_FREEBSD_H
+
+#if defined(ENABLE_DCO) && defined(TARGET_FREEBSD)
+
+#include <buffer.h>
+#include "event.h"
+
+#include "ovpn_dco_freebsd.h"
+
+typedef enum ovpn_key_slot dco_key_slot_t;
+typedef enum ovpn_key_cipher dco_cipher_t;
+
+enum ovpn_message_type_t {
+    OVPN_CMD_DEL_PEER,
+    OVPN_CMD_PACKET,
+};
+
+enum ovpn_del_reason_t {
+    OVPN_DEL_PEER_REASON_EXPIRED,
+    OVPN_DEL_PEER_REASON_TRANSPORT_ERROR,
+    OVPN_DEL_PEER_REASON_USERSPACE,
+};
+
+typedef struct dco_context {
+    bool open;
+    int fd;
+    int pipefd[2];
+
+    char ifname[IFNAMSIZ];
+
+    struct buffer dco_packet_in;
+
+    int dco_message_type;
+    int dco_message_peer_id;
+    int dco_del_peer_reason;
+} dco_context_t;
+
+#endif /* defined(ENABLE_DCO) && defined(TARGET_FREEBSD) */
+#endif /* ifndef DCO_FREEBSD_H */
diff --git a/src/openvpn/dco_internal.h b/src/openvpn/dco_internal.h
index 3ceb26d6..728e3092 100644
--- a/src/openvpn/dco_internal.h
+++ b/src/openvpn/dco_internal.h
@@ -27,6 +27,7 @@ 
 
 #if defined(ENABLE_DCO)
 
+#include "dco_freebsd.h"
 #include "dco_linux.h"
 
 /**
diff --git a/src/openvpn/forward.c b/src/openvpn/forward.c
index 55c939c4..14ad24fa 100644
--- a/src/openvpn/forward.c
+++ b/src/openvpn/forward.c
@@ -1113,7 +1113,7 @@  process_incoming_link(struct context *c)
 static void
 process_incoming_dco(struct context *c)
 {
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
     struct link_socket_info *lsi = get_link_socket_info(c);
     dco_context_t *dco = &c->c1.tuntap->dco;
 
@@ -1140,7 +1140,7 @@  process_incoming_dco(struct context *c)
 
     c->c2.buf = orig_buff;
     buf_init(&dco->dco_packet_in, 0);
-#endif /* if defined(ENABLE_DCO) && defined(TARGET_LINUX) */
+#endif /* if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD)) */
 }
 
 /*
@@ -1946,7 +1946,7 @@  io_wait_dowork(struct context *c, const unsigned int flags)
 #ifdef ENABLE_ASYNC_PUSH
     static int file_shift = FILE_SHIFT;
 #endif
-#ifdef TARGET_LINUX
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
     static int dco_shift = DCO_SHIFT;    /* Event from DCO linux kernel module */
 #endif
 
@@ -2056,7 +2056,7 @@  io_wait_dowork(struct context *c, const unsigned int flags)
      */
     socket_set(c->c2.link_socket, c->c2.event_set, socket, (void *)&socket_shift, NULL);
     tun_set(c->c1.tuntap, c->c2.event_set, tuntap, (void *)&tun_shift, NULL);
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
     if (socket & EVENT_READ && c->c2.did_open_tun)
     {
         dco_event_set(&c->c1.tuntap->dco, c->c2.event_set, (void *)&dco_shift);
diff --git a/src/openvpn/mtcp.c b/src/openvpn/mtcp.c
index eb88a56a..1abb903f 100644
--- a/src/openvpn/mtcp.c
+++ b/src/openvpn/mtcp.c
@@ -283,7 +283,7 @@  multi_tcp_wait(const struct context *c,
     }
 #endif
     tun_set(c->c1.tuntap, mtcp->es, EVENT_READ, MTCP_TUN, persistent);
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
     dco_event_set(&c->c1.tuntap->dco, mtcp->es, MTCP_DCO);
 #endif
 
@@ -763,7 +763,7 @@  multi_tcp_process_io(struct multi_context *m)
                     multi_tcp_action(m, mi, TA_INITIAL, false);
                 }
             }
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
             /* incoming data on DCO? */
             else if (e->arg == MTCP_DCO)
             {
diff --git a/src/openvpn/mudp.c b/src/openvpn/mudp.c
index ddb1efc9..4ab18b72 100644
--- a/src/openvpn/mudp.c
+++ b/src/openvpn/mudp.c
@@ -381,7 +381,7 @@  multi_process_io_udp(struct multi_context *m)
         multi_process_file_closed(m, mpp_flags);
     }
 #endif
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
     else if (status & DCO_READ)
     {
         if (!IS_SIG(&m->top))
diff --git a/src/openvpn/multi.c b/src/openvpn/multi.c
index dcf4438d..53ee3e1a 100644
--- a/src/openvpn/multi.c
+++ b/src/openvpn/multi.c
@@ -3154,7 +3154,7 @@  multi_close_instance_on_signal(struct multi_context *m, struct multi_instance *m
     multi_close_instance(m, mi, false);
 }
 
-#if (defined(ENABLE_DCO) && defined(TARGET_LINUX)) || defined(ENABLE_MANAGEMENT)
+#if (defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))) || defined(ENABLE_MANAGEMENT)
 static void
 multi_signal_instance(struct multi_context *m, struct multi_instance *mi, const int sig)
 {
@@ -3163,7 +3163,7 @@  multi_signal_instance(struct multi_context *m, struct multi_instance *mi, const
 }
 #endif
 
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
 static void
 process_incoming_dco_packet(struct multi_context *m, struct multi_instance *mi,
                             dco_context_t *dco)
diff --git a/src/openvpn/options.c b/src/openvpn/options.c
index 0ce3158b..14cb4cc4 100644
--- a/src/openvpn/options.c
+++ b/src/openvpn/options.c
@@ -183,7 +183,7 @@  static const char usage_message[] =
     "                  does not begin with \"tun\" or \"tap\".\n"
     "--dev-node node : Explicitly set the device node rather than using\n"
     "                  /dev/net/tun, /dev/tun, /dev/tap, etc.\n"
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
     "--disable-dco   : Do not attempt using Data Channel Offload.\n"
 #endif
     "--lladdr hw     : Set the link layer address of the tap device.\n"
@@ -1794,7 +1794,7 @@  show_settings(const struct options *o)
     SHOW_STR(dev);
     SHOW_STR(dev_type);
     SHOW_STR(dev_node);
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
     SHOW_BOOL(tuntap_options.disable_dco);
 #endif
     SHOW_STR(lladdr);
@@ -3670,7 +3670,7 @@  options_postprocess_mutate(struct options *o, struct env_set *es)
     }
 
     /* check if any option should force disabling DCO */
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
     o->tuntap_options.disable_dco = !dco_check_option_conflict(D_DCO, o);
 #endif
 
@@ -5872,7 +5872,7 @@  add_option(struct options *options,
 #endif
     else if (streq(p[0], "disable-dco"))
     {
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
         options->tuntap_options.disable_dco = true;
 #endif
     }
diff --git a/src/openvpn/options.h b/src/openvpn/options.h
index ec3c44b1..212f4b05 100644
--- a/src/openvpn/options.h
+++ b/src/openvpn/options.h
@@ -876,7 +876,7 @@  void options_string_import(struct options *options,
 
 bool key_is_external(const struct options *options);
 
-#if defined(ENABLE_DCO) && defined(TARGET_LINUX)
+#if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))
 
 /**
  * Returns whether the current configuration has dco enabled.
@@ -887,7 +887,7 @@  dco_enabled(const struct options *o)
     return !o->tuntap_options.disable_dco;
 }
 
-#else /* if defined(ENABLE_DCO) && defined(TARGET_LINUX) */
+#else /* if defined(ENABLE_DCO) && (defined(TARGET_LINUX) || defined(TARGET_FREEBSD))*/
 
 static inline bool
 dco_enabled(const struct options *o)
diff --git a/src/openvpn/ovpn_dco_freebsd.h b/src/openvpn/ovpn_dco_freebsd.h
new file mode 100644
index 00000000..abebbb78
--- /dev/null
+++ b/src/openvpn/ovpn_dco_freebsd.h
@@ -0,0 +1,64 @@ 
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_OVPN_H_
+#define _NET_IF_OVPN_H_
+
+#include <sys/types.h>
+#include <netinet/in.h>
+
+/* Maximum size of an ioctl request. */
+#define OVPN_MAX_REQUEST_SIZE	4096
+
+enum ovpn_notif_type {
+	OVPN_NOTIF_DEL_PEER,
+};
+
+enum ovpn_key_slot {
+	OVPN_KEY_SLOT_PRIMARY	= 0,
+	OVPN_KEY_SLOT_SECONDARY	= 1
+};
+
+enum ovpn_key_cipher {
+	OVPN_CIPHER_ALG_NONE			= 0,
+	OVPN_CIPHER_ALG_AES_GCM			= 1,
+	OVPN_CIPHER_ALG_CHACHA20_POLY1305	= 2
+};
+
+#define OVPN_NEW_PEER		_IO  ('D', 1)
+#define OVPN_DEL_PEER		_IO  ('D', 2)
+#define OVPN_GET_STATS		_IO  ('D', 3)
+#define OVPN_NEW_KEY		_IO  ('D', 4)
+#define OVPN_SWAP_KEYS		_IO  ('D', 5)
+#define OVPN_DEL_KEY		_IO  ('D', 6)
+#define OVPN_SET_PEER		_IO  ('D', 7)
+#define OVPN_START_VPN		_IO  ('D', 8)
+#define OVPN_SEND_PKT		_IO  ('D', 9)
+#define OVPN_POLL_PKT		_IO  ('D', 10)
+#define OVPN_GET_PKT		_IO  ('D', 11)
+
+#endif
diff --git a/src/openvpn/tun.c b/src/openvpn/tun.c
index 54f7d72c..e84d5a27 100644
--- a/src/openvpn/tun.c
+++ b/src/openvpn/tun.c
@@ -1724,7 +1724,7 @@  tun_name_is_fixed(const char *dev)
     return has_digit(dev);
 }
 
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
 static bool
 tun_dco_enabled(struct tuntap *tt)
 {
@@ -1838,9 +1838,9 @@  open_tun_generic(const char *dev, const char *dev_type, const char *dev_node,
         tt->actual_name = string_alloc(dynamic_opened ? dynamic_name : dev, NULL);
     }
 }
-#endif /* !_WIN32 && !TARGET_LINUX */
+#endif /* !_WIN32 && !TARGET_LINUX && !TARGET_FREEBSD*/
 
-#if defined(TARGET_LINUX)
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
 static void
 open_tun_dco_generic(const char *dev, const char *dev_type,
                      struct tuntap *tt, openvpn_net_ctx_t *ctx)
@@ -1913,7 +1913,7 @@  open_tun_dco_generic(const char *dev, const char *dev_type,
         tt->actual_name = string_alloc(dev, NULL);
     }
 }
-#endif /* TARGET_LINUX */
+#endif /* TARGET_LINUX || TARGET_FREEBSD*/
 
 #if !defined(_WIN32)
 static void
@@ -2296,7 +2296,7 @@  close_tun(struct tuntap *tt, openvpn_net_ctx_t *ctx)
         net_ctx_reset(ctx);
     }
 
-#ifdef TARGET_LINUX
+#if defined(TARGET_LINUX) || defined(TARGET_FREEBSD)
     if (tun_dco_enabled(tt))
     {
         close_tun_dco(tt, ctx);
@@ -2917,20 +2917,24 @@  void
 open_tun(const char *dev, const char *dev_type, const char *dev_node, struct tuntap *tt,
          openvpn_net_ctx_t *ctx)
 {
-    open_tun_generic(dev, dev_type, dev_node, tt);
-
-    if (tt->fd >= 0 && tt->type == DEV_TYPE_TUN)
-    {
-        int i = IFF_POINTOPOINT | IFF_MULTICAST;
+    if (tun_dco_enabled(tt)) {
+        open_tun_dco_generic(dev, dev_type, tt, ctx);
+    } else {
+        open_tun_generic(dev, dev_type, dev_node, tt);
 
-        if (ioctl(tt->fd, TUNSIFMODE, &i) < 0)
+        if (tt->fd >= 0 && tt->type == DEV_TYPE_TUN)
         {
-            msg(M_WARN | M_ERRNO, "ioctl(TUNSIFMODE)");
-        }
-        i = 1;
-        if (ioctl(tt->fd, TUNSIFHEAD, &i) < 0)
-        {
-            msg(M_WARN | M_ERRNO, "ioctl(TUNSIFHEAD)");
+            int i = IFF_POINTOPOINT | IFF_MULTICAST;
+
+            if (ioctl(tt->fd, TUNSIFMODE, &i) < 0)
+            {
+                msg(M_WARN | M_ERRNO, "ioctl(TUNSIFMODE)");
+            }
+            i = 1;
+            if (ioctl(tt->fd, TUNSIFHEAD, &i) < 0)
+            {
+                msg(M_WARN | M_ERRNO, "ioctl(TUNSIFHEAD)");
+            }
         }
     }
 }
diff --git a/src/openvpn/tun.h b/src/openvpn/tun.h
index 8ec8f51f..ea4946e9 100644
--- a/src/openvpn/tun.h
+++ b/src/openvpn/tun.h
@@ -142,6 +142,12 @@  struct tuntap_options {
     bool disable_dco;
 };
 
+#elif defined(TARGET_FREEBSD)
+
+struct tuntap_options {
+    bool disable_dco;
+};
+
 #else  /* if defined(_WIN32) || defined(TARGET_ANDROID) */
 
 struct tuntap_options {