[Openvpn-devel,v2] Retain CAP_NET_ADMIN when dropping privileges

Message ID 20220330205536.139-1-timo@rothenpieler.org
State Changes Requested
Headers show
Series [Openvpn-devel,v2] Retain CAP_NET_ADMIN when dropping privileges | expand

Commit Message

Timo Rothenpieler March 30, 2022, 9:55 a.m. UTC
---
Using libcap-ng now


 configure.ac                              | 19 +++++
 distro/systemd/openvpn-client@.service.in |  2 +-
 distro/systemd/openvpn-server@.service.in |  2 +-
 src/openvpn/init.c                        | 25 ++++++-
 src/openvpn/platform.c                    | 91 +++++++++++++++++++++++
 src/openvpn/platform.h                    |  5 ++
 6 files changed, 140 insertions(+), 4 deletions(-)

Comments

Jan Just Keijser March 30, 2022, 7:53 p.m. UTC | #1
Hi,

On 30/03/22 22:55, Timo Rothenpieler wrote:
> ---
> Using libcap-ng now
sorry to butt in late, but I've got a nasty feeling about this... the 
whole purpose of using
   --user
is, according to the man page
        --user user
               Change the user ID of the OpenVPN process to user after  
initialization,  dropping  privileges  in  the
               process.  This  option is useful to protect the system in 
the event that some hostile party was able to
               gain control of an OpenVPN session. Though OpenVPN's 
security features make this unlikely, it  is  pro‐
               vided as a second line of defense.

               By  setting  user  to  nobody or somebody similarly 
unprivileged, the hostile party would be limited in
               what damage they could cause. Of course once you take 
away privileges, you cannot  return  them  to an
               OpenVPN  session.  This  means, for example, that if you 
want to reset an OpenVPN daemon with a SIGUSR1
               signal (for example in response to a DHCP reset), you 
should make use of one or more of  the  --persist
               options  to  ensure  that OpenVPN doesn't need to execute 
any privileged operations in order to restart
               (such as re-reading key files or running ifconfig on the 
TUN device).

yet with this patch, the openvpn process remains capable of

        CAP_NET_ADMIN
               Perform various network-related operations:
               * interface configuration;
               * administration of IP firewall, masquerading, and
                 accounting;
               * modify routing tables;
               * bind to any address for transparent proxying;
               * set type-of-service (TOS);
               * clear driver statistics;
               * set promiscuous mode;
               * enabling multicasting;
               * use setsockopt(2) to set the following socket options:
                 SO_DEBUG, SO_MARK, SO_PRIORITY (for a priority outside
                 the range 0 to 6), SO_RCVBUFFORCE, and SO_SNDBUFFORCE.

so this "second line of defense" it getting *VERY* leaky in my opinion 
(and warrants a manpage update, at the very least).

The proper solution would be to have openvpn fork on itself, keep a 
"barebones" process running as root, but with the actual control and 
data channels running in the forked process using truly minimal privileges.

JM2CW,

JJK


>
>   configure.ac                              | 19 +++++
>   distro/systemd/openvpn-client@.service.in |  2 +-
>   distro/systemd/openvpn-server@.service.in |  2 +-
>   src/openvpn/init.c                        | 25 ++++++-
>   src/openvpn/platform.c                    | 91 +++++++++++++++++++++++
>   src/openvpn/platform.h                    |  5 ++
>   6 files changed, 140 insertions(+), 4 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 7199483a..168360d4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -794,6 +794,25 @@ dnl
>   	esac
>   fi
>   
> +dnl
> +dnl Depend on libcap-ng on Linux
> +dnl
> +case "$host" in
> +	*-*-linux*)
> +		PKG_CHECK_MODULES([LIBCAPNG],
> +				  [libcap-ng],
> +				  [have_libcapng="yes"],
> +				  [AC_MSG_ERROR([libcap-ng package not found. Is the development package and pkg-config installed?])]
> +		)
> +		AC_CHECK_HEADER([sys/prctl.h],,[AC_MSG_ERROR([sys/prctl.h not found!])])
> +
> +		CFLAGS="${CFLAGS} ${LIBCAPNG_CFALGS}"
> +		LIBS="${LIBS} ${LIBCAPNG_LIBS}"
> +		AC_DEFINE(HAVE_LIBCAPNG, 1, [Enable libcap-ng support])
> +	;;
> +esac
> +
> +
>   if test "${with_crypto_library}" = "openssl"; then
>   	AC_ARG_VAR([OPENSSL_CFLAGS], [C compiler flags for OpenSSL])
>   	AC_ARG_VAR([OPENSSL_LIBS], [linker flags for OpenSSL])
> diff --git a/distro/systemd/openvpn-client@.service.in b/distro/systemd/openvpn-client@.service.in
> index cbcef653..159fb4dc 100644
> --- a/distro/systemd/openvpn-client@.service.in
> +++ b/distro/systemd/openvpn-client@.service.in
> @@ -11,7 +11,7 @@ Type=notify
>   PrivateTmp=true
>   WorkingDirectory=/etc/openvpn/client
>   ExecStart=@sbindir@/openvpn --suppress-timestamps --nobind --config %i.conf
> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE
> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE
>   LimitNPROC=10
>   DeviceAllow=/dev/null rw
>   DeviceAllow=/dev/net/tun rw
> diff --git a/distro/systemd/openvpn-server@.service.in b/distro/systemd/openvpn-server@.service.in
> index d1cc72cb..6e8e7d94 100644
> --- a/distro/systemd/openvpn-server@.service.in
> +++ b/distro/systemd/openvpn-server@.service.in
> @@ -11,7 +11,7 @@ Type=notify
>   PrivateTmp=true
>   WorkingDirectory=/etc/openvpn/server
>   ExecStart=@sbindir@/openvpn --status %t/openvpn-server/status-%i.log --status-version 2 --suppress-timestamps --config %i.conf
> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
>   LimitNPROC=10
>   DeviceAllow=/dev/null rw
>   DeviceAllow=/dev/net/tun rw
> diff --git a/src/openvpn/init.c b/src/openvpn/init.c
> index 8818ba6f..705eb92e 100644
> --- a/src/openvpn/init.c
> +++ b/src/openvpn/init.c
> @@ -1138,6 +1138,25 @@ possibly_become_daemon(const struct options *options)
>       return ret;
>   }
>   
> +/*
> + * Determine if we need to retain process capabilities. DCO and SITNL need it.
> + * Enforce it for DCO, but only try and soft-fail for SITNL to keep backwards compat.
> + */
> +static int
> +get_need_keep_caps(struct context *c)
> +{
> +    if (dco_enabled(&c->options))
> +    {
> +        return 1;
> +    }
> +
> +#ifdef ENABLE_SITNL
> +    return -1;
> +#else
> +    return 0;
> +#endif
> +}
> +
>   /*
>    * Actually do UID/GID downgrade, chroot and SELinux context switching, if requested.
>    */
> @@ -1167,8 +1186,10 @@ do_uid_gid_chroot(struct context *c, bool no_delay)
>           {
>               if (no_delay)
>               {
> -                platform_group_set(&c0->platform_state_group);
> -                platform_user_set(&c0->platform_state_user);
> +                int keep_caps = get_need_keep_caps(c);
> +                platform_user_group_set(&c0->platform_state_user,
> +                                        &c0->platform_state_group,
> +                                        keep_caps);
>               }
>               else if (c->first_time)
>               {
> diff --git a/src/openvpn/platform.c b/src/openvpn/platform.c
> index 450f28ba..4fce5a83 100644
> --- a/src/openvpn/platform.c
> +++ b/src/openvpn/platform.c
> @@ -43,6 +43,11 @@
>   #include <direct.h>
>   #endif
>   
> +#ifdef HAVE_LIBCAPNG
> +#include <cap-ng.h>
> +#include <sys/prctl.h>
> +#endif
> +
>   /* Redefine the top level directory of the filesystem
>    * to restrict access to files for security */
>   void
> @@ -155,6 +160,92 @@ platform_group_set(const struct platform_state_group *state)
>   #endif
>   }
>   
> +void platform_user_group_set(const struct platform_state_user *user_state,
> +                             const struct platform_state_group *group_state,
> +                             int keep_caps)
> +{
> +    unsigned int err_flags = (keep_caps > 0) ? M_FATAL : M_NONFATAL;
> +#ifdef HAVE_LIBCAPNG
> +    int new_gid = -1, new_uid = -1;
> +    int res;
> +
> +    if (keep_caps == 0)
> +    {
> +        goto fallback;
> +    }
> +
> +    /*
> +     * new_uid/new_gid defaults to -1, which will not make
> +     * libcap-ng change the UID/GID unless configured
> +     */
> +    if (group_state->groupname && group_state->gr)
> +    {
> +        new_gid = group_state->gr->gr_gid;
> +    }
> +    if (user_state->username && user_state->pw)
> +    {
> +        new_uid = user_state->pw->pw_uid;
> +    }
> +
> +    /* Prepare capabilities before dropping UID/GID */
> +    capng_clear(CAPNG_SELECT_BOTH);
> +    res = capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED, CAP_NET_ADMIN);
> +    if (res < 0)
> +    {
> +        msg(err_flags, "capng_update(CAP_NET_ADMIN) failed: %d", res);
> +        goto fallback;
> +    }
> +
> +    /* Change to new UID/GID.
> +     * capng_change_id() internally calls capng_apply() to apply prepared capabilities.
> +     */
> +    res = capng_change_id(new_uid, new_gid, CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING);
> +    if (res == -4 || res == -6)
> +    {
> +        msg(M_ERR, "capng_change_id('%s','%s') failed: %d",
> +            user_state->username, group_state->groupname, res);
> +    }
> +    else if (res < 0)
> +    {
> +        if (res == -3)
> +        {
> +            msg(M_NONFATAL, "Following error likely due to missing capability CAP_SETPCAP.");
> +        }
> +        msg(err_flags | M_ERRNO, "capng_change_id('%s','%s') failed retaining capabilities: %d",
> +            user_state->username, group_state->groupname, res);
> +        goto fallback;
> +    }
> +
> +    if (new_uid >= 0)
> +    {
> +         msg(M_INFO, "UID set to %s", user_state->username);
> +    }
> +    if (new_gid >= 0)
> +    {
> +         msg(M_INFO, "GID set to %s", group_state->groupname);
> +    }
> +
> +    msg(M_INFO, "Capabilities retained: CAP_NET_ADMIN");
> +
> +    return;
> +fallback:
> +    /* capng_change_id() can leave this flag clobbered on failure */
> +    if (prctl(PR_GET_KEEPCAPS) && prctl(PR_SET_KEEPCAPS, 0) < 0)
> +    {
> +        msg(M_ERR, "Clearing KEEPCAPS flag failed");
> +    }
> +#endif  /* HAVE_LIBCAPNG */
> +    if (keep_caps)
> +    {
> +        msg(err_flags, "Unable to retain capabilities");
> +    }
> +
> +    platform_group_set(group_state);
> +    platform_user_set(user_state);
> +}
> +
> +
> +
>   /* Change process priority */
>   void
>   platform_nice(int niceval)
> diff --git a/src/openvpn/platform.h b/src/openvpn/platform.h
> index a3eec298..b163f093 100644
> --- a/src/openvpn/platform.h
> +++ b/src/openvpn/platform.h
> @@ -85,6 +85,11 @@ bool platform_group_get(const char *groupname, struct platform_state_group *stat
>   
>   void platform_group_set(const struct platform_state_group *state);
>   
> +void platform_user_group_set(const struct platform_state_user *user_state,
> +                             const struct platform_state_group *group_state,
> +                             int keep_caps);
> +
> +
>   /*
>    * Extract UID or GID
>    */
David Sommerseth March 30, 2022, 11:06 p.m. UTC | #2
On 31/03/2022 08:53, Jan Just Keijser wrote:
> Hi,
> 
> On 30/03/22 22:55, Timo Rothenpieler wrote:
>> ---
>> Using libcap-ng now
> sorry to butt in late, but I've got a nasty feeling about this... the 
> whole purpose of using
>    --user
> is, according to the man page
>         --user user
>                Change the user ID of the OpenVPN process to user after 
> initialization,  dropping  privileges  in  the
>                process.  This  option is useful to protect the system in 
> the event that some hostile party was able to
>                gain control of an OpenVPN session. Though OpenVPN's 
> security features make this unlikely, it  is  pro‐
>                vided as a second line of defense.
> 
>                By  setting  user  to  nobody or somebody similarly 
> unprivileged, the hostile party would be limited in
>                what damage they could cause. Of course once you take 
> away privileges, you cannot  return  them  to an
>                OpenVPN  session.  This  means, for example, that if you 
> want to reset an OpenVPN daemon with a SIGUSR1
>                signal (for example in response to a DHCP reset), you 
> should make use of one or more of  the  --persist
>                options  to  ensure  that OpenVPN doesn't need to execute 
> any privileged operations in order to restart
>                (such as re-reading key files or running ifconfig on the 
> TUN device).
> 
> yet with this patch, the openvpn process remains capable of
> 
>         CAP_NET_ADMIN
>                Perform various network-related operations:
>                * interface configuration;
>                * administration of IP firewall, masquerading, and
>                  accounting;
>                * modify routing tables;
>                * bind to any address for transparent proxying;
>                * set type-of-service (TOS);
>                * clear driver statistics;
>                * set promiscuous mode;
>                * enabling multicasting;
>                * use setsockopt(2) to set the following socket options:
>                  SO_DEBUG, SO_MARK, SO_PRIORITY (for a priority outside
>                  the range 0 to 6), SO_RCVBUFFORCE, and SO_SNDBUFFORCE.
> 
> so this "second line of defense" it getting *VERY* leaky in my opinion 
> (and warrants a manpage update, at the very least).
> 
> The proper solution would be to have openvpn fork on itself, keep a 
> "barebones" process running as root, but with the actual control and 
> data channels running in the forked process using truly minimal privileges.

Hi,

You have some valid points about locking down OpenVPN.  However, without 
this change and having OpenVPN run completely without privileges it will 
not work well in certain situations with ovpn-dco.  Because it will not 
have the capabilities needed to interact with the kernel module. 
CAP_NET_ADMIN does give this possibility.

In regards to man-page update, I agree.  And we should do that as a 
additional patch.

And when it comes to this patch, currently it only use the capabilities 
feature when DCO is available on the system and not disabled in the 
configuration.  This in options.c which calls 
dco_check_option_conflict(), which is implemented in dco.c.

There is however another related challenge in OpenVPN 2.x, which became 
even clearer than be fore with the sitnl implementation we switched over 
to on Linux by default with v2.5.  When using --user/--group without 
--persist-tun, a reconnect would tear down the interface but could not 
recover again and the connection dies.  Using --persist-tun, it could 
work a bit better *unless* it needs to change the IP address of the tun 
interface.  I'm not sure how well, OpenVPN 2.x works if new routes are 
being pushed (OpenVPN 3 supports that as well).  This challenge is also 
resolved by granting the process CAP_NET_ADMIN capabilities.

For now, my opinion is that it is currently acceptable to have 
CAP_NET_ADMIN available when running with ovpn-dco; to have a smooth 
user experience.  OpenVPN is after all a network related process.



As a way forward after this, the aspect of how much to trust, 
capabilities and privileges you put into a single process needs to be 
better defined.  OpenVPN 2.x has a monolithic design, and the 
architecture of privilege separation is lacking at best.

We have not done any attempts improving this, as this will not be a 
trivial refactoring.  With ovpn-dco, the data-channel handling is 
already handled outside of the master OpenVPN process; so here there is 
some improvements indirectly.  Now the control-channel handling + 
network configuration (with CAP_NET_ADMIN) runs in the same process scope.

Arne and I discussed a while back to look into if the Network 
Configuration service (openvpn3-service-netcfg) from OpenVPN 3 Linux can 
be implemented in a way where it can be used by OpenVPN 2.x as well. 
Theoretically, this is possible but not trivial.

In OpenVPN 3 Linux, privilege separation is part of the design.  The 
client process (openvpn3-serivce-client) runs completely unprivileged, 
but gets its tun/ovpn-dco interface created by openvpn3-service-netcfg 
and the client passes VPN IP address, routes and DNS settings to this 
netcfg service which applies these changes.  The netcfg service runs as 
openvpn:openvpn with CAP_NET_ADMIN (and a few optional other ones, 
depending on the system setup) and can only be approached by 
openvpn3-service-client processes.  The rest of the OpenVPN 3 Linux 
stack runs completely unprivileged.  On systems with SELinux enabled, 
both openvpn3-service-client and openvpn3-service-netcfg runs confined 
in their own separate SELinux contexts, with a strict policy what each 
of them can do.

I am willing to work on making the netcfg service even less "OpenVPN 3 
centric", and it has a potential to grow towards a generic VPN API on 
Linux.  The current D-Bus interface it uses is highly inspired by the 
Android VPN API.  But this won't happen in a short time and not in time 
for the OpenVPN 2.6 release.  This is probably something which is more 
realistic for OpenVPN 2.8.  But this needs to be discussed more 
thoroughly (next hackathon?).
Arne Schwabe March 30, 2022, 11:17 p.m. UTC | #3
> I am willing to work on making the netcfg service even less "OpenVPN 3 
> centric", and it has a potential to grow towards a generic VPN API on 
> Linux.  The current D-Bus interface it uses is highly inspired by the 
> Android VPN API.  But this won't happen in a short time and not in time 
> for the OpenVPN 2.6 release.  This is probably something which is more 
> realistic for OpenVPN 2.8.  But this needs to be discussed more 
> thoroughly (next hackathon?).

The current interface and interface design is ill suited for a server 
and I would even go so far as saying that extending the interface to 
support the full server functionality will go against the goal of the 
interface design.

A limited client mode might work but even that is something were you 
loose features that OpenVPN support in client mode. On platforms like 
ANdroid/iOS were you need to use an API like that you can hand wave that 
away as limitation of the platform but on Linux that is harder.

Also it is a very linux specific solution that will not work on macOS or 
Windows. So we might consider approaches as well.

Arne
Gert Doering March 31, 2022, 12:02 a.m. UTC | #4
Hi,

On Thu, Mar 31, 2022 at 12:06:06PM +0200, David Sommerseth wrote:
> There is however another related challenge in OpenVPN 2.x, which became 
> even clearer than be fore with the sitnl implementation we switched over 
> to on Linux by default with v2.5.  When using --user/--group without 
> --persist-tun, a reconnect would tear down the interface but could not 
> recover again and the connection dies.  Using --persist-tun, it could 
> work a bit better *unless* it needs to change the IP address of the tun 
> interface.  I'm not sure how well, OpenVPN 2.x works if new routes are 
> being pushed (OpenVPN 3 supports that as well).  This challenge is also 
> resolved by granting the process CAP_NET_ADMIN capabilities.

For most non-trivial stuff, OpenVPN with --user will run into problems,
be it route teardown, installing of new routes at renegotiation time,
...

So most people today just run 2.x as root, not getting any security 
benefits.

> For now, my opinion is that it is currently acceptable to have 
> CAP_NET_ADMIN available when running with ovpn-dco; to have a smooth 
> user experience.  OpenVPN is after all a network related process.

I'd even go for "keep CAP_NET_ADMIN for DCO and sitnl" - because it
means "all the route/interface manipulation *and cleanup* stuff can
be done properly, without having to carry root privileges".

> As a way forward after this, the aspect of how much to trust, 
> capabilities and privileges you put into a single process needs to be 
> better defined.  OpenVPN 2.x has a monolithic design, and the 
> architecture of privilege separation is lacking at best.

You might be surprised at what we have in 2.x :-) - with the service
pipe, we can run OpenVPN fully unprivileged, and do so on Windows.  

We just never had anyone bother to implement a backend for this for
"Unixy" platforms...

The benefit of that, securitywise, wouldn't be very large anyway,
compared to "CAP_NET_ADMIN + --user nobody" - the service is still
able to mess up routing and interface config, and that's about what
privileges remain in that combo... - so, dubious benefits, lots of
work.

gert
Timo Rothenpieler March 31, 2022, 12:29 a.m. UTC | #5
On 31.03.2022 13:02, Gert Doering wrote:
> Hi,
> 
> On Thu, Mar 31, 2022 at 12:06:06PM +0200, David Sommerseth wrote:
>> There is however another related challenge in OpenVPN 2.x, which became
>> even clearer than be fore with the sitnl implementation we switched over
>> to on Linux by default with v2.5.  When using --user/--group without
>> --persist-tun, a reconnect would tear down the interface but could not
>> recover again and the connection dies.  Using --persist-tun, it could
>> work a bit better *unless* it needs to change the IP address of the tun
>> interface.  I'm not sure how well, OpenVPN 2.x works if new routes are
>> being pushed (OpenVPN 3 supports that as well).  This challenge is also
>> resolved by granting the process CAP_NET_ADMIN capabilities.
> 
> For most non-trivial stuff, OpenVPN with --user will run into problems,
> be it route teardown, installing of new routes at renegotiation time,
> ...
> 
> So most people today just run 2.x as root, not getting any security
> benefits.
> 
>> For now, my opinion is that it is currently acceptable to have
>> CAP_NET_ADMIN available when running with ovpn-dco; to have a smooth
>> user experience.  OpenVPN is after all a network related process.
> 
> I'd even go for "keep CAP_NET_ADMIN for DCO and sitnl" - because it
> means "all the route/interface manipulation *and cleanup* stuff can
> be done properly, without having to carry root privileges".

That's exactly what the patch does.
Only difference is that for sitnl, to avoid breaking existing setups, it 
will fall back to the old approach of switching user if the capability 
retaining approach failed.
Gert Doering March 31, 2022, 12:34 a.m. UTC | #6
Hi,

On Thu, Mar 31, 2022 at 01:29:28PM +0200, Timo Rothenpieler wrote:
> That's exactly what the patch does.

Which I very much like :-)  (I said that on IRC already, repeating here
for the list archive)

> Only difference is that for sitnl, to avoid breaking existing setups, it 
> will fall back to the old approach of switching user if the capability 
> retaining approach failed.

I'm a bit undecided if this is really something to worry about... but
then, in an existing and working systemd environment with reduced
capabilities it might break setups going 2.5 -> 2.6, so maybe "being
careful about things" is the better way :-)

gert
David Sommerseth March 31, 2022, 12:39 a.m. UTC | #7
On 31/03/2022 13:34, Gert Doering wrote:
> Hi,

> 

> On Thu, Mar 31, 2022 at 01:29:28PM +0200, Timo Rothenpieler wrote:

>> That's exactly what the patch does.

> 

> Which I very much like :-)  (I said that on IRC already, repeating here

> for the list archive)

> 

>> Only difference is that for sitnl, to avoid breaking existing setups, it

>> will fall back to the old approach of switching user if the capability

>> retaining approach failed.

> 

> I'm a bit undecided if this is really something to worry about... but

> then, in an existing and working systemd environment with reduced

> capabilities it might break setups going 2.5 -> 2.6, so maybe "being

> careful about things" is the better way :-)


Yeah, I agree with this.  For v2.6, the time is too short to be dare too 
much potential breakage now.  But we can consider further steps with v2.7.


-- 
kind regards,

David Sommerseth
OpenVPN Inc
David Sommerseth March 31, 2022, 2:20 a.m. UTC | #8
On 30/03/2022 22:55, Timo Rothenpieler wrote:
> ---

> Using libcap-ng now

> 

> 

>   configure.ac                              | 19 +++++

>   distro/systemd/openvpn-client@.service.in |  2 +-

>   distro/systemd/openvpn-server@.service.in |  2 +-

>   src/openvpn/init.c                        | 25 ++++++-

>   src/openvpn/platform.c                    | 91 +++++++++++++++++++++++

>   src/openvpn/platform.h                    |  5 ++

>   6 files changed, 140 insertions(+), 4 deletions(-)

> 

Since I worked closely with Timo on this patch version, I don't feel I 
should give it an ACK verdict alone.  But I believe this is the right 
patch to include.

I will just suggest a commit message:

----------------------------------------------
platform: Retain CAP_NET_ADMIN when dropping privileges

On Linux, when dropping privileges, interaction with the network 
configuration, such as tearing down routes or ovpn-dco interfaces
will fail when --user/--group are used.

This patch set sets the CAP_NET_ADMIN capability, which grants the 
needed privileges during the lifetime of the OpenVPN process when 
dropping root privileges.

Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>

Reviewed-By: David Sommerseth <davids@openvpn.net>

----------------------------------------------


I have otherwise tested this patch on a Rocky Linux 8 distribution.
Client test cases I ran when testing this was:

   * from the command line, with and without DCO
   * via systemd, with and without DCO

With these 4 test cases, each of them were run with combinations of

   * no --user/--group
   * only --user
   * only --group
   * both --user and --group

I've also run a few tests using an --up script which modified 
/etc/resolv.conf, which also worked as expected with capabilities enabled.

There were no unexpected behavior with this final patch set, with one 
special exception which is outside the scope of this patch - SELinux.

SELinux on Fedora and RHEL (which Rocky Linux inherits) denies the 
OpenVPN process when run via systemd to use the SET_PCAP capability.  In 
addition, the SELinux reference policy also denies all interactions with 
the Generic Netlink interfaces used by ovpn-dco.  I will follow up this 
with the upstream SELinux reference policy maintainers.

Package maintainers needing SELinux can in the mean time, until an 
updated SELinux policy is available, provide an additional SELinux 
module which grants the needed privileges to openvpn_t labelled processes.


-- 
kind regards,

David Sommerseth
OpenVPN Inc
Gert Doering March 31, 2022, 2:26 a.m. UTC | #9
Hi,

On Thu, Mar 31, 2022 at 03:20:59PM +0200, David Sommerseth wrote:
> I've also run a few tests using an --up script which modified 
> /etc/resolv.conf, which also worked as expected with capabilities enabled.

This is actually an interesting corner case.  As far as I understand,
--up runs before setuid, so that should always succeed - but if you do
that, cleaning up resolv.conf in --down won't succeed.

(But this is a totally independent problem of "network things without
root" that this patch addresses)

[..]
> SELinux on Fedora and RHEL (which Rocky Linux inherits) denies the 
> OpenVPN process when run via systemd to use the SET_PCAP capability.  In 
> addition, the SELinux reference policy also denies all interactions with 
> the Generic Netlink interfaces used by ovpn-dco.  I will follow up this 
> with the upstream SELinux reference policy maintainers.

This is a good find.  Thanks :-)

gert
David Sommerseth March 31, 2022, 3:38 a.m. UTC | #10
On 31/03/2022 15:26, Gert Doering wrote:
> Hi,

> 

> On Thu, Mar 31, 2022 at 03:20:59PM +0200, David Sommerseth wrote:

>> I've also run a few tests using an --up script which modified

>> /etc/resolv.conf, which also worked as expected with capabilities enabled.

> 

> This is actually an interesting corner case.  As far as I understand,

> --up runs before setuid, so that should always succeed - but if you do

> that, cleaning up resolv.conf in --down won't succeed.


That is actually correct, and to be honest I didn't think about the 
order of when running as client.

We could "fix" --down now, but I will not recommend it at all.  We could 
add the CAP_DAC_OVERRIDE capability.  But that's a massive sledge 
hammer, giving read/write access to any file on the system. Only 
security modules like SELinux, AppArmor and such can block access with 
this capability enabled.  So this is definitely not the right capability 
to have in the main OpenVPN process now.


-- 
kind regards,

David Sommerseth
OpenVPN Inc
Gert Doering March 31, 2022, 3:54 a.m. UTC | #11
Hi,

On Thu, Mar 31, 2022 at 04:38:06PM +0200, David Sommerseth wrote:
> We could "fix" --down now, but I will not recommend it at all.  We could 
> add the CAP_DAC_OVERRIDE capability.  But that's a massive sledge 
> hammer, giving read/write access to any file on the system. Only 
> security modules like SELinux, AppArmor and such can block access with 
> this capability enabled.  So this is definitely not the right capability 
> to have in the main OpenVPN process now.

I agree.  

This is not what I was suggesting (not at all), just pointing out that 
the combination of --up, --user and --down is not with its own set 
of surprises ;-)

gert
Antonio Quartulli April 5, 2022, 11:52 p.m. UTC | #12
Hi,

On 30/03/2022 22:55, Timo Rothenpieler wrote:
> ---
> Using libcap-ng now

A commit message would be good, but I see that David has already 
proposed one.

> 
> 
>   configure.ac                              | 19 +++++
>   distro/systemd/openvpn-client@.service.in |  2 +-
>   distro/systemd/openvpn-server@.service.in |  2 +-
>   src/openvpn/init.c                        | 25 ++++++-
>   src/openvpn/platform.c                    | 91 +++++++++++++++++++++++
>   src/openvpn/platform.h                    |  5 ++
>   6 files changed, 140 insertions(+), 4 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 7199483a..168360d4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -794,6 +794,25 @@ dnl
>   	esac
>   fi
>   
> +dnl
> +dnl Depend on libcap-ng on Linux
> +dnl
> +case "$host" in
> +	*-*-linux*)
> +		PKG_CHECK_MODULES([LIBCAPNG],
> +				  [libcap-ng],
> +				  [have_libcapng="yes"],

do we really need have_libcapng? it seems it is not used further in 
configure.ac

> +				  [AC_MSG_ERROR([libcap-ng package not found. Is the development package and pkg-config installed?])]
> +		)
> +		AC_CHECK_HEADER([sys/prctl.h],,[AC_MSG_ERROR([sys/prctl.h not found!])])
> +
> +		CFLAGS="${CFLAGS} ${LIBCAPNG_CFALGS}"
> +		LIBS="${LIBS} ${LIBCAPNG_LIBS}"
> +		AC_DEFINE(HAVE_LIBCAPNG, 1, [Enable libcap-ng support])
> +	;;
> +esac
> +
> +
>   if test "${with_crypto_library}" = "openssl"; then
>   	AC_ARG_VAR([OPENSSL_CFLAGS], [C compiler flags for OpenSSL])
>   	AC_ARG_VAR([OPENSSL_LIBS], [linker flags for OpenSSL])
> diff --git a/distro/systemd/openvpn-client@.service.in b/distro/systemd/openvpn-client@.service.in
> index cbcef653..159fb4dc 100644
> --- a/distro/systemd/openvpn-client@.service.in
> +++ b/distro/systemd/openvpn-client@.service.in
> @@ -11,7 +11,7 @@ Type=notify
>   PrivateTmp=true
>   WorkingDirectory=/etc/openvpn/client
>   ExecStart=@sbindir@/openvpn --suppress-timestamps --nobind --config %i.conf
> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE
> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE
>   LimitNPROC=10
>   DeviceAllow=/dev/null rw
>   DeviceAllow=/dev/net/tun rw
> diff --git a/distro/systemd/openvpn-server@.service.in b/distro/systemd/openvpn-server@.service.in
> index d1cc72cb..6e8e7d94 100644
> --- a/distro/systemd/openvpn-server@.service.in
> +++ b/distro/systemd/openvpn-server@.service.in
> @@ -11,7 +11,7 @@ Type=notify
>   PrivateTmp=true
>   WorkingDirectory=/etc/openvpn/server
>   ExecStart=@sbindir@/openvpn --status %t/openvpn-server/status-%i.log --status-version 2 --suppress-timestamps --config %i.conf
> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
>   LimitNPROC=10
>   DeviceAllow=/dev/null rw
>   DeviceAllow=/dev/net/tun rw
> diff --git a/src/openvpn/init.c b/src/openvpn/init.c
> index 8818ba6f..705eb92e 100644
> --- a/src/openvpn/init.c
> +++ b/src/openvpn/init.c
> @@ -1138,6 +1138,25 @@ possibly_become_daemon(const struct options *options)
>       return ret;
>   }
>   
> +/*
> + * Determine if we need to retain process capabilities. DCO and SITNL need it.
> + * Enforce it for DCO, but only try and soft-fail for SITNL to keep backwards compat.
> + */

Since this function is returning a tri-state value, may it be better to 
document what the returned values mean in the comment above?

> +static int
> +get_need_keep_caps(struct context *c)

I know this function is kinda a getter, but two verbs in the function 
name sounds weird. How about just "need_keep_caps"?

> +{
> +    if (dco_enabled(&c->options))
> +    {
> +        return 1;
> +    }
> +
> +#ifdef ENABLE_SITNL
> +    return -1;
> +#else
> +    return 0;
> +#endif
> +}

Since the check above is really platform specific, wouldn't it make 
sense to move its definition to platform.c?
At the end of the day this function is just invoked once to get a value 
and immediately pass it to platform_user_group_set().

> +
>   /*
>    * Actually do UID/GID downgrade, chroot and SELinux context switching, if requested.
>    */
> @@ -1167,8 +1186,10 @@ do_uid_gid_chroot(struct context *c, bool no_delay)
>           {
>               if (no_delay)
>               {
> -                platform_group_set(&c0->platform_state_group);
> -                platform_user_set(&c0->platform_state_user);

By removing the invocations above, these two functions are now defined 
and used only in platform.c. Therefore, they can be made static and 
their declarations can be removed from platform.h.

> +                int keep_caps = get_need_keep_caps(c);
> +                platform_user_group_set(&c0->platform_state_user,
> +                                        &c0->platform_state_group,
> +                                        keep_caps);
>               }
>               else if (c->first_time)
>               {
> diff --git a/src/openvpn/platform.c b/src/openvpn/platform.c
> index 450f28ba..4fce5a83 100644
> --- a/src/openvpn/platform.c
> +++ b/src/openvpn/platform.c
> @@ -43,6 +43,11 @@
>   #include <direct.h>
>   #endif
>   
> +#ifdef HAVE_LIBCAPNG
> +#include <cap-ng.h>
> +#include <sys/prctl.h>
> +#endif
> +
>   /* Redefine the top level directory of the filesystem
>    * to restrict access to files for security */
>   void
> @@ -155,6 +160,92 @@ platform_group_set(const struct platform_state_group *state)
>   #endif
>   }
>   

Should we add a comment here describing what this function is doing? I 
know it is not complex, but maybe we should say that we will try to set 
the uid/gid using libcap-ng because we will also try to retain the 
CAP_NET_ADMIN capa? If libcap-ng is not available, we will simply try to 
switch user/group, unless not allowed (i.e. SITNL is enabled)

> +void platform_user_group_set(const struct platform_state_user *user_state,
> +                             const struct platform_state_group *group_state,
> +                             int keep_caps)
> +{
> +    unsigned int err_flags = (keep_caps > 0) ? M_FATAL : M_NONFATAL;

Shrug - I really don't like this "OpenVPN way" of handling code flow - 
with a value passed to a print() function...but hey, this is how it's 
currently done in most of the code.

> +#ifdef HAVE_LIBCAPNG
> +    int new_gid = -1, new_uid = -1;
> +    int res;
> +
> +    if (keep_caps == 0)
> +    {
> +        goto fallback;
> +    }
> +
> +    /*
> +     * new_uid/new_gid defaults to -1, which will not make
> +     * libcap-ng change the UID/GID unless configured
> +     */
> +    if (group_state->groupname && group_state->gr)
> +    {
> +        new_gid = group_state->gr->gr_gid;
> +    }
> +    if (user_state->username && user_state->pw)
> +    {
> +        new_uid = user_state->pw->pw_uid;
> +    }
> +
> +    /* Prepare capabilities before dropping UID/GID */
> +    capng_clear(CAPNG_SELECT_BOTH);
> +    res = capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED, CAP_NET_ADMIN);
> +    if (res < 0)
> +    {
> +        msg(err_flags, "capng_update(CAP_NET_ADMIN) failed: %d", res);
> +        goto fallback;
> +    }
> +
> +    /* Change to new UID/GID.
> +     * capng_change_id() internally calls capng_apply() to apply prepared capabilities.
> +     */
> +    res = capng_change_id(new_uid, new_gid, CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING);
> +    if (res == -4 || res == -6)

Argh, libcap-ng defines its own error codes...can we at least extend the 
comment above to explain what these magic -4 and -6 are? (this way we 
are sure to catch what the devel

> +    {
> +        msg(M_ERR, "capng_change_id('%s','%s') failed: %d",
> +            user_state->username, group_state->groupname, res);

If I understood correctly, in these two cases (-4 and -6) we failed to 
set either the uid or the gid, but the capas were retained?

If so, this means that the --user or --group options failed to be 
applied. Is it right to continue the execution? If I am not wrong 
OpenVPN currently aborts if the user/group couldn't be changed. 
Shouldn't we keep this behaviuor?

> +    }
> +    else if (res < 0)
> +    {
> +        if (res == -3)
> +        {
> +            msg(M_NONFATAL, "Following error likely due to missing capability CAP_SETPCAP.");
> +        }
> +        msg(err_flags | M_ERRNO, "capng_change_id('%s','%s') failed retaining capabilities: %d",

'man cap_change_id' does not mention setting errno at all.
What do we expect to see with M_ERRNO?

> +            user_state->username, group_state->groupname, res);
> +        goto fallback;
> +    }
> +
> +    if (new_uid >= 0)
> +    {
> +         msg(M_INFO, "UID set to %s", user_state->username);
> +    }
> +    if (new_gid >= 0)
> +    {
> +         msg(M_INFO, "GID set to %s", group_state->groupname);
> +    }
> +
> +    msg(M_INFO, "Capabilities retained: CAP_NET_ADMIN");
> +
> +    return;
> +fallback:
> +    /* capng_change_id() can leave this flag clobbered on failure */
> +    if (prctl(PR_GET_KEEPCAPS) && prctl(PR_SET_KEEPCAPS, 0) < 0)

What does this flag mean and why do we need to reset it to 0?
And what happens if the flag is not reset? (It seems we don't care much 
and just continue the execution)

> +    {
> +        msg(M_ERR, "Clearing KEEPCAPS flag failed");
> +    }
> +#endif  /* HAVE_LIBCAPNG */
> +    if (keep_caps)

for my poor eyes..add an empty line after the endif :-)

> +    {
> +        msg(err_flags, "Unable to retain capabilities");
> +    }
> +
> +    platform_group_set(group_state);
> +    platform_user_set(user_state);
> +}
> +
> +
> +

I think only one empty line is enough :D

>   /* Change process priority */
>   void
>   platform_nice(int niceval)
> diff --git a/src/openvpn/platform.h b/src/openvpn/platform.h
> index a3eec298..b163f093 100644
> --- a/src/openvpn/platform.h
> +++ b/src/openvpn/platform.h
> @@ -85,6 +85,11 @@ bool platform_group_get(const char *groupname, struct platform_state_group *stat
>   
>   void platform_group_set(const struct platform_state_group *state);
>   
> +void platform_user_group_set(const struct platform_state_user *user_state,
> +                             const struct platform_state_group *group_state,
> +                             int keep_caps);
> +
> +
>   /*
>    * Extract UID or GID
>    */


Regards,
Timo Rothenpieler April 6, 2022, 2:44 a.m. UTC | #13
On 06.04.2022 11:52, Antonio Quartulli wrote:
> Hi,
> 
> On 30/03/2022 22:55, Timo Rothenpieler wrote:
>> ---
>> Using libcap-ng now
> 
> A commit message would be good, but I see that David has already 
> proposed one.

The latest rebased version of this patch already has that message.
Just seemed silly to re-send it just because of that:
https://github.com/BtbN/openvpn/tree/dco


>>
>>
>>   configure.ac                              | 19 +++++
>>   distro/systemd/openvpn-client@.service.in |  2 +-
>>   distro/systemd/openvpn-server@.service.in |  2 +-
>>   src/openvpn/init.c                        | 25 ++++++-
>>   src/openvpn/platform.c                    | 91 +++++++++++++++++++++++
>>   src/openvpn/platform.h                    |  5 ++
>>   6 files changed, 140 insertions(+), 4 deletions(-)
>>
>> diff --git a/configure.ac b/configure.ac
>> index 7199483a..168360d4 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -794,6 +794,25 @@ dnl
>>       esac
>>   fi
>> +dnl
>> +dnl Depend on libcap-ng on Linux
>> +dnl
>> +case "$host" in
>> +    *-*-linux*)
>> +        PKG_CHECK_MODULES([LIBCAPNG],
>> +                  [libcap-ng],
>> +                  [have_libcapng="yes"],
> 
> do we really need have_libcapng? it seems it is not used further in 
> configure.ac

I have little to no experience with autotools, and I think this is 
straight up copied from the openvpn3 setup.
Can I just leave the line empty? Or put empty [] there?

>> +                  [AC_MSG_ERROR([libcap-ng package not found. Is the 
>> development package and pkg-config installed?])]
>> +        )
>> +        AC_CHECK_HEADER([sys/prctl.h],,[AC_MSG_ERROR([sys/prctl.h not 
>> found!])])
>> +
>> +        CFLAGS="${CFLAGS} ${LIBCAPNG_CFALGS}"
>> +        LIBS="${LIBS} ${LIBCAPNG_LIBS}"
>> +        AC_DEFINE(HAVE_LIBCAPNG, 1, [Enable libcap-ng support])
>> +    ;;
>> +esac
>> +
>> +
>>   if test "${with_crypto_library}" = "openssl"; then
>>       AC_ARG_VAR([OPENSSL_CFLAGS], [C compiler flags for OpenSSL])
>>       AC_ARG_VAR([OPENSSL_LIBS], [linker flags for OpenSSL])
>> diff --git a/distro/systemd/openvpn-client@.service.in 
>> b/distro/systemd/openvpn-client@.service.in
>> index cbcef653..159fb4dc 100644
>> --- a/distro/systemd/openvpn-client@.service.in
>> +++ b/distro/systemd/openvpn-client@.service.in
>> @@ -11,7 +11,7 @@ Type=notify
>>   PrivateTmp=true
>>   WorkingDirectory=/etc/openvpn/client
>>   ExecStart=@sbindir@/openvpn --suppress-timestamps --nobind --config 
>> %i.conf
>> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW 
>> CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE
>> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW 
>> CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE
>>   LimitNPROC=10
>>   DeviceAllow=/dev/null rw
>>   DeviceAllow=/dev/net/tun rw
>> diff --git a/distro/systemd/openvpn-server@.service.in 
>> b/distro/systemd/openvpn-server@.service.in
>> index d1cc72cb..6e8e7d94 100644
>> --- a/distro/systemd/openvpn-server@.service.in
>> +++ b/distro/systemd/openvpn-server@.service.in
>> @@ -11,7 +11,7 @@ Type=notify
>>   PrivateTmp=true
>>   WorkingDirectory=/etc/openvpn/server
>>   ExecStart=@sbindir@/openvpn --status %t/openvpn-server/status-%i.log 
>> --status-version 2 --suppress-timestamps --config %i.conf
>> -CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE 
>> CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE 
>> CAP_AUDIT_WRITE
>> +CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE 
>> CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT 
>> CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
>>   LimitNPROC=10
>>   DeviceAllow=/dev/null rw
>>   DeviceAllow=/dev/net/tun rw
>> diff --git a/src/openvpn/init.c b/src/openvpn/init.c
>> index 8818ba6f..705eb92e 100644
>> --- a/src/openvpn/init.c
>> +++ b/src/openvpn/init.c
>> @@ -1138,6 +1138,25 @@ possibly_become_daemon(const struct options 
>> *options)
>>       return ret;
>>   }
>> +/*
>> + * Determine if we need to retain process capabilities. DCO and SITNL 
>> need it.
>> + * Enforce it for DCO, but only try and soft-fail for SITNL to keep 
>> backwards compat.
>> + */
> 
> Since this function is returning a tri-state value, may it be better to 
> document what the returned values mean in the comment above?
> 
>> +static int
>> +get_need_keep_caps(struct context *c)
> 
> I know this function is kinda a getter, but two verbs in the function 
> name sounds weird. How about just "need_keep_caps"?
> 
>> +{
>> +    if (dco_enabled(&c->options))
>> +    {
>> +        return 1;
>> +    }
>> +
>> +#ifdef ENABLE_SITNL
>> +    return -1;
>> +#else
>> +    return 0;
>> +#endif
>> +}
> 
> Since the check above is really platform specific, wouldn't it make 
> sense to move its definition to platform.c?
> At the end of the day this function is just invoked once to get a value 
> and immediately pass it to platform_user_group_set().

The reason it ended up here is that it needs access to the context, to 
find out if it's using the dco or not.
Should the context just be passed to the platform function instead? I 
see no other one of them doing that.

>> +
>>   /*
>>    * Actually do UID/GID downgrade, chroot and SELinux context 
>> switching, if requested.
>>    */
>> @@ -1167,8 +1186,10 @@ do_uid_gid_chroot(struct context *c, bool 
>> no_delay)
>>           {
>>               if (no_delay)
>>               {
>> -                platform_group_set(&c0->platform_state_group);
>> -                platform_user_set(&c0->platform_state_user);
> 
> By removing the invocations above, these two functions are now defined 
> and used only in platform.c. Therefore, they can be made static and 
> their declarations can be removed from platform.h.

That was intentional, since it seemed out of scope of this patch.
I can easily add it though if preferred.

>> +                int keep_caps = get_need_keep_caps(c);
>> +                platform_user_group_set(&c0->platform_state_user,
>> +                                        &c0->platform_state_group,
>> +                                        keep_caps);
>>               }
>>               else if (c->first_time)
>>               {
>> diff --git a/src/openvpn/platform.c b/src/openvpn/platform.c
>> index 450f28ba..4fce5a83 100644
>> --- a/src/openvpn/platform.c
>> +++ b/src/openvpn/platform.c
>> @@ -43,6 +43,11 @@
>>   #include <direct.h>
>>   #endif
>> +#ifdef HAVE_LIBCAPNG
>> +#include <cap-ng.h>
>> +#include <sys/prctl.h>
>> +#endif
>> +
>>   /* Redefine the top level directory of the filesystem
>>    * to restrict access to files for security */
>>   void
>> @@ -155,6 +160,92 @@ platform_group_set(const struct 
>> platform_state_group *state)
>>   #endif
>>   }
> 
> Should we add a comment here describing what this function is doing? I 
> know it is not complex, but maybe we should say that we will try to set 
> the uid/gid using libcap-ng because we will also try to retain the 
> CAP_NET_ADMIN capa? If libcap-ng is not available, we will simply try to 
> switch user/group, unless not allowed (i.e. SITNL is enabled)
> 
>> +void platform_user_group_set(const struct platform_state_user 
>> *user_state,
>> +                             const struct platform_state_group 
>> *group_state,
>> +                             int keep_caps)
>> +{
>> +    unsigned int err_flags = (keep_caps > 0) ? M_FATAL : M_NONFATAL;
> 
> Shrug - I really don't like this "OpenVPN way" of handling code flow - 
> with a value passed to a print() function...but hey, this is how it's 
> currently done in most of the code.
> 
>> +#ifdef HAVE_LIBCAPNG
>> +    int new_gid = -1, new_uid = -1;
>> +    int res;
>> +
>> +    if (keep_caps == 0)
>> +    {
>> +        goto fallback;
>> +    }
>> +
>> +    /*
>> +     * new_uid/new_gid defaults to -1, which will not make
>> +     * libcap-ng change the UID/GID unless configured
>> +     */
>> +    if (group_state->groupname && group_state->gr)
>> +    {
>> +        new_gid = group_state->gr->gr_gid;
>> +    }
>> +    if (user_state->username && user_state->pw)
>> +    {
>> +        new_uid = user_state->pw->pw_uid;
>> +    }
>> +
>> +    /* Prepare capabilities before dropping UID/GID */
>> +    capng_clear(CAPNG_SELECT_BOTH);
>> +    res = capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED, 
>> CAP_NET_ADMIN);
>> +    if (res < 0)
>> +    {
>> +        msg(err_flags, "capng_update(CAP_NET_ADMIN) failed: %d", res);
>> +        goto fallback;
>> +    }
>> +
>> +    /* Change to new UID/GID.
>> +     * capng_change_id() internally calls capng_apply() to apply 
>> prepared capabilities.
>> +     */
>> +    res = capng_change_id(new_uid, new_gid, CAPNG_DROP_SUPP_GRP | 
>> CAPNG_CLEAR_BOUNDING);
>> +    if (res == -4 || res == -6)
> 
> Argh, libcap-ng defines its own error codes...can we at least extend the 
> comment above to explain what these magic -4 and -6 are? (this way we 
> are sure to catch what the devel
> 
>> +    {
>> +        msg(M_ERR, "capng_change_id('%s','%s') failed: %d",
>> +            user_state->username, group_state->groupname, res);
> 
> If I understood correctly, in these two cases (-4 and -6) we failed to 
> set either the uid or the gid, but the capas were retained?
> 
> If so, this means that the --user or --group options failed to be 
> applied. Is it right to continue the execution? If I am not wrong 
> OpenVPN currently aborts if the user/group couldn't be changed. 
> Shouldn't we keep this behaviuor?

That's the whole point of this check, it aborts (via M_ERR) if 
setuid/setgid failed, since there is no fallback to that and it'd fail 
again anyway if openvpn tried again.

A comment explaining the -4 and -6 return codes is definitely a good idea.

>> +    }
>> +    else if (res < 0)
>> +    {
>> +        if (res == -3)
>> +        {
>> +            msg(M_NONFATAL, "Following error likely due to missing 
>> capability CAP_SETPCAP.");
>> +        }
>> +        msg(err_flags | M_ERRNO, "capng_change_id('%s','%s') failed 
>> retaining capabilities: %d",
> 
> 'man cap_change_id' does not mention setting errno at all.
> What do we expect to see with M_ERRNO?

Every function it internally calls sets errno, so in case of failure 
errno will reflect what went wrong. Like, for example EPERM will be the 
most common cause of failure.

>> +            user_state->username, group_state->groupname, res);
>> +        goto fallback;
>> +    }
>> +
>> +    if (new_uid >= 0)
>> +    {
>> +         msg(M_INFO, "UID set to %s", user_state->username);
>> +    }
>> +    if (new_gid >= 0)
>> +    {
>> +         msg(M_INFO, "GID set to %s", group_state->groupname);
>> +    }
>> +
>> +    msg(M_INFO, "Capabilities retained: CAP_NET_ADMIN");
>> +
>> +    return;
>> +fallback:
>> +    /* capng_change_id() can leave this flag clobbered on failure */
>> +    if (prctl(PR_GET_KEEPCAPS) && prctl(PR_SET_KEEPCAPS, 0) < 0)
> 
> What does this flag mean and why do we need to reset it to 0?
> And what happens if the flag is not reset? (It seems we don't care much 
> and just continue the execution)

See https://github.com/stevegrubb/libcap-ng/issues/33

It's basically working around that issue, where capng_change_id() 
failing can leave the KEEPCAPS flag set, which would lead to our 
fallback setuid to retain rootlike capabilities, so we have to ensure 
it's unset before attempting to setuid.

It's fixed in upstream libcap-ng now, but it will take potentially 
months until that makes it into a release, and then even longer until it 
hits distros, so implementing a workaround seemed in order.
It has no ill effects and will just be a no-op with the patch in place.

>> +    {
>> +        msg(M_ERR, "Clearing KEEPCAPS flag failed");
>> +    }
>> +#endif  /* HAVE_LIBCAPNG */
>> +    if (keep_caps)
> 
> for my poor eyes..add an empty line after the endif :-)
> 
>> +    {
>> +        msg(err_flags, "Unable to retain capabilities");
>> +    }
>> +
>> +    platform_group_set(group_state);
>> +    platform_user_set(user_state);
>> +}
>> +
>> +
>> +
> 
> I think only one empty line is enough :D
> 
>>   /* Change process priority */
>>   void
>>   platform_nice(int niceval)
>> diff --git a/src/openvpn/platform.h b/src/openvpn/platform.h
>> index a3eec298..b163f093 100644
>> --- a/src/openvpn/platform.h
>> +++ b/src/openvpn/platform.h
>> @@ -85,6 +85,11 @@ bool platform_group_get(const char *groupname, 
>> struct platform_state_group *stat
>>   void platform_group_set(const struct platform_state_group *state);
>> +void platform_user_group_set(const struct platform_state_user 
>> *user_state,
>> +                             const struct platform_state_group 
>> *group_state,
>> +                             int keep_caps);
>> +
>> +
>>   /*
>>    * Extract UID or GID
>>    */
> 
> 
> Regards,
>
David Sommerseth April 6, 2022, 3:34 a.m. UTC | #14
On 06/04/2022 14:44, Timo Rothenpieler wrote:
>>>
>>
>> 'man cap_change_id' does not mention setting errno at all.
>> What do we expect to see with M_ERRNO?
> 
> Every function it internally calls sets errno, so in case of failure 
> errno will reflect what went wrong. Like, for example EPERM will be the 
> most common cause of failure.

capng_change_id() does several other lower level calls, like prctl(), 
setgroups(), setresgid() and setresuid().  They all set errno if an 
error occurs.  The return code of capng_change_id() just reflects in 
which phase of dropping privileges/uid/gid it failed.

For more details of the capng_change_id(), the implementation itself 
isn't that hard to read (but it does a several steps to harden the 
privilege drop): 
<https://github.com/stevegrubb/libcap-ng/blob/03b8572843b36bf071776a311c61f8d1dcfc4d53/src/cap-ng.c#L960>
David Sommerseth April 6, 2022, 3:41 a.m. UTC | #15
On 06/04/2022 14:44, Timo Rothenpieler wrote:
>>>
>>> --- a/configure.ac
>>> +++ b/configure.ac
>>> @@ -794,6 +794,25 @@ dnl
>>>       esac
>>>   fi
>>> +dnl
>>> +dnl Depend on libcap-ng on Linux
>>> +dnl
>>> +case "$host" in
>>> +    *-*-linux*)
>>> +        PKG_CHECK_MODULES([LIBCAPNG],
>>> +                  [libcap-ng],
>>> +                  [have_libcapng="yes"],
>>
>> do we really need have_libcapng? it seems it is not used further in 
>> configure.ac
> 
> I have little to no experience with autotools, and I think this is 
> straight up copied from the openvpn3 setup.

That's probably right.

> Can I just leave the line empty? Or put empty [] there?

Yes, [] should suffice.  That said, it makes no big difference if it is 
there or not.  If needed to check ("test") if libcap-ng is available or 
not, this checks needs to be added again though.

That said, we do not have a consistent way of using PKG_CHECK_MODULES() 
in general.  We have at least 4 different ways in use today.

Probably something to clean-up some day later.

Patch

diff --git a/configure.ac b/configure.ac
index 7199483a..168360d4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -794,6 +794,25 @@  dnl
 	esac
 fi
 
+dnl
+dnl Depend on libcap-ng on Linux
+dnl
+case "$host" in
+	*-*-linux*)
+		PKG_CHECK_MODULES([LIBCAPNG],
+				  [libcap-ng],
+				  [have_libcapng="yes"],
+				  [AC_MSG_ERROR([libcap-ng package not found. Is the development package and pkg-config installed?])]
+		)
+		AC_CHECK_HEADER([sys/prctl.h],,[AC_MSG_ERROR([sys/prctl.h not found!])])
+
+		CFLAGS="${CFLAGS} ${LIBCAPNG_CFALGS}"
+		LIBS="${LIBS} ${LIBCAPNG_LIBS}"
+		AC_DEFINE(HAVE_LIBCAPNG, 1, [Enable libcap-ng support])
+	;;
+esac
+
+
 if test "${with_crypto_library}" = "openssl"; then
 	AC_ARG_VAR([OPENSSL_CFLAGS], [C compiler flags for OpenSSL])
 	AC_ARG_VAR([OPENSSL_LIBS], [linker flags for OpenSSL])
diff --git a/distro/systemd/openvpn-client@.service.in b/distro/systemd/openvpn-client@.service.in
index cbcef653..159fb4dc 100644
--- a/distro/systemd/openvpn-client@.service.in
+++ b/distro/systemd/openvpn-client@.service.in
@@ -11,7 +11,7 @@  Type=notify
 PrivateTmp=true
 WorkingDirectory=/etc/openvpn/client
 ExecStart=@sbindir@/openvpn --suppress-timestamps --nobind --config %i.conf
-CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE
+CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE
 LimitNPROC=10
 DeviceAllow=/dev/null rw
 DeviceAllow=/dev/net/tun rw
diff --git a/distro/systemd/openvpn-server@.service.in b/distro/systemd/openvpn-server@.service.in
index d1cc72cb..6e8e7d94 100644
--- a/distro/systemd/openvpn-server@.service.in
+++ b/distro/systemd/openvpn-server@.service.in
@@ -11,7 +11,7 @@  Type=notify
 PrivateTmp=true
 WorkingDirectory=/etc/openvpn/server
 ExecStart=@sbindir@/openvpn --status %t/openvpn-server/status-%i.log --status-version 2 --suppress-timestamps --config %i.conf
-CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
+CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SETPCAP CAP_SYS_CHROOT CAP_DAC_OVERRIDE CAP_AUDIT_WRITE
 LimitNPROC=10
 DeviceAllow=/dev/null rw
 DeviceAllow=/dev/net/tun rw
diff --git a/src/openvpn/init.c b/src/openvpn/init.c
index 8818ba6f..705eb92e 100644
--- a/src/openvpn/init.c
+++ b/src/openvpn/init.c
@@ -1138,6 +1138,25 @@  possibly_become_daemon(const struct options *options)
     return ret;
 }
 
+/*
+ * Determine if we need to retain process capabilities. DCO and SITNL need it.
+ * Enforce it for DCO, but only try and soft-fail for SITNL to keep backwards compat.
+ */
+static int
+get_need_keep_caps(struct context *c)
+{
+    if (dco_enabled(&c->options))
+    {
+        return 1;
+    }
+
+#ifdef ENABLE_SITNL
+    return -1;
+#else
+    return 0;
+#endif
+}
+
 /*
  * Actually do UID/GID downgrade, chroot and SELinux context switching, if requested.
  */
@@ -1167,8 +1186,10 @@  do_uid_gid_chroot(struct context *c, bool no_delay)
         {
             if (no_delay)
             {
-                platform_group_set(&c0->platform_state_group);
-                platform_user_set(&c0->platform_state_user);
+                int keep_caps = get_need_keep_caps(c);
+                platform_user_group_set(&c0->platform_state_user,
+                                        &c0->platform_state_group,
+                                        keep_caps);
             }
             else if (c->first_time)
             {
diff --git a/src/openvpn/platform.c b/src/openvpn/platform.c
index 450f28ba..4fce5a83 100644
--- a/src/openvpn/platform.c
+++ b/src/openvpn/platform.c
@@ -43,6 +43,11 @@ 
 #include <direct.h>
 #endif
 
+#ifdef HAVE_LIBCAPNG
+#include <cap-ng.h>
+#include <sys/prctl.h>
+#endif
+
 /* Redefine the top level directory of the filesystem
  * to restrict access to files for security */
 void
@@ -155,6 +160,92 @@  platform_group_set(const struct platform_state_group *state)
 #endif
 }
 
+void platform_user_group_set(const struct platform_state_user *user_state,
+                             const struct platform_state_group *group_state,
+                             int keep_caps)
+{
+    unsigned int err_flags = (keep_caps > 0) ? M_FATAL : M_NONFATAL;
+#ifdef HAVE_LIBCAPNG
+    int new_gid = -1, new_uid = -1;
+    int res;
+
+    if (keep_caps == 0)
+    {
+        goto fallback;
+    }
+
+    /*
+     * new_uid/new_gid defaults to -1, which will not make
+     * libcap-ng change the UID/GID unless configured
+     */
+    if (group_state->groupname && group_state->gr)
+    {
+        new_gid = group_state->gr->gr_gid;
+    }
+    if (user_state->username && user_state->pw)
+    {
+        new_uid = user_state->pw->pw_uid;
+    }
+
+    /* Prepare capabilities before dropping UID/GID */
+    capng_clear(CAPNG_SELECT_BOTH);
+    res = capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED, CAP_NET_ADMIN);
+    if (res < 0)
+    {
+        msg(err_flags, "capng_update(CAP_NET_ADMIN) failed: %d", res);
+        goto fallback;
+    }
+
+    /* Change to new UID/GID.
+     * capng_change_id() internally calls capng_apply() to apply prepared capabilities.
+     */
+    res = capng_change_id(new_uid, new_gid, CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING);
+    if (res == -4 || res == -6)
+    {
+        msg(M_ERR, "capng_change_id('%s','%s') failed: %d",
+            user_state->username, group_state->groupname, res);
+    }
+    else if (res < 0)
+    {
+        if (res == -3)
+        {
+            msg(M_NONFATAL, "Following error likely due to missing capability CAP_SETPCAP.");
+        }
+        msg(err_flags | M_ERRNO, "capng_change_id('%s','%s') failed retaining capabilities: %d",
+            user_state->username, group_state->groupname, res);
+        goto fallback;
+    }
+
+    if (new_uid >= 0)
+    {
+         msg(M_INFO, "UID set to %s", user_state->username);
+    }
+    if (new_gid >= 0)
+    {
+         msg(M_INFO, "GID set to %s", group_state->groupname);
+    }
+
+    msg(M_INFO, "Capabilities retained: CAP_NET_ADMIN");
+
+    return;
+fallback:
+    /* capng_change_id() can leave this flag clobbered on failure */
+    if (prctl(PR_GET_KEEPCAPS) && prctl(PR_SET_KEEPCAPS, 0) < 0)
+    {
+        msg(M_ERR, "Clearing KEEPCAPS flag failed");
+    }
+#endif  /* HAVE_LIBCAPNG */
+    if (keep_caps)
+    {
+        msg(err_flags, "Unable to retain capabilities");
+    }
+
+    platform_group_set(group_state);
+    platform_user_set(user_state);
+}
+
+
+
 /* Change process priority */
 void
 platform_nice(int niceval)
diff --git a/src/openvpn/platform.h b/src/openvpn/platform.h
index a3eec298..b163f093 100644
--- a/src/openvpn/platform.h
+++ b/src/openvpn/platform.h
@@ -85,6 +85,11 @@  bool platform_group_get(const char *groupname, struct platform_state_group *stat
 
 void platform_group_set(const struct platform_state_group *state);
 
+void platform_user_group_set(const struct platform_state_user *user_state,
+                             const struct platform_state_group *group_state,
+                             int keep_caps);
+
+
 /*
  * Extract UID or GID
  */