From: Antonio Quartulli <antonio@openvpn.net>
Since commit 64f5f0062d77 ("ovpn: netlink - check CAP_NET_ADMIN in
source namespace only") an unprivileged userns owner can drive every
ovpn genetlink op and, without further mitigation, pin arbitrary
amounts of kernel memory by creating interfaces, peers and key slots
in a loop.
Tag the affected allocations with __GFP_ACCOUNT so they are charged
to the caller's cgroup and capped by memory.max: ovpn_peer,
ovpn_socket, the ~128 KiB MP peer hashtable, the crypto key slot,
the per-peer dst_cache and netdev trackers, and ovpn_bind when
allocated from process context.
ovpn_bind_from_sockaddr() and ovpn_peer_reset_sockaddr() now take an
explicit gfp_t. The RX float path runs in softirq, where 'current'
is unrelated to the userns owner, so it passes plain GFP_ATOMIC and
accepts that those binds go unaccounted. Per-packet datapath
allocations are out of scope: short-lived and traffic-driven.
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
drivers/net/ovpn/bind.c | 12 ++++++++++--
drivers/net/ovpn/bind.h | 3 ++-
drivers/net/ovpn/crypto_aead.c | 2 +-
drivers/net/ovpn/main.c | 2 +-
drivers/net/ovpn/netlink.c | 3 ++-
drivers/net/ovpn/peer.c | 18 ++++++++++++------
drivers/net/ovpn/peer.h | 2 +-
drivers/net/ovpn/socket.c | 4 ++--
8 files changed, 31 insertions(+), 15 deletions(-)
@@ -17,10 +17,18 @@
/**
* ovpn_bind_from_sockaddr - retrieve binding matching sockaddr
* @ss: the sockaddr to match
+ * @gfp: GFP flags for the allocation. All callers reach this function with
+ * peer->lock (a spinlock) held, so the GFP must be atomic regardless of
+ * the outer context. Callers in process context (e.g. netlink handlers)
+ * should additionally OR in __GFP_ACCOUNT so the charge lands on the
+ * caller's memcg; callers on the RX float path (softirq, where current
+ * is unrelated to the userns owner) must pass plain GFP_ATOMIC to avoid
+ * mis-accounting to a random cgroup.
*
* Return: the bind matching the passed sockaddr if found, NULL otherwise
*/
-struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *ss)
+struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *ss,
+ gfp_t gfp)
{
struct ovpn_bind *bind;
size_t sa_len;
@@ -32,7 +40,7 @@ struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *ss)
else
return ERR_PTR(-EAFNOSUPPORT);
- bind = kzalloc_obj(*bind, GFP_ATOMIC);
+ bind = kzalloc_obj(*bind, gfp);
if (unlikely(!bind))
return ERR_PTR(-ENOMEM);
@@ -95,7 +95,8 @@ static inline bool ovpn_bind_skb_src_match(const struct ovpn_bind *bind,
return true;
}
-struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *sa);
+struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *sa,
+ gfp_t gfp);
void ovpn_bind_reset(struct ovpn_peer *peer, struct ovpn_bind *bind);
#endif /* _NET_OVPN_OVPNBIND_H_ */
@@ -414,7 +414,7 @@ ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc)
return ERR_PTR(-EINVAL);
/* build the key slot */
- ks = kmalloc_obj(*ks);
+ ks = kmalloc_obj(*ks, GFP_KERNEL_ACCOUNT);
if (!ks)
return ERR_PTR(-ENOMEM);
@@ -57,7 +57,7 @@ static int ovpn_mp_alloc(struct ovpn_priv *ovpn)
/* the peer container is fairly large, therefore we allocate it only in
* MP mode
*/
- ovpn->peers = kzalloc_obj(*ovpn->peers);
+ ovpn->peers = kzalloc_obj(*ovpn->peers, GFP_KERNEL_ACCOUNT);
if (!ovpn->peers)
return -ENOMEM;
@@ -295,7 +295,8 @@ static int ovpn_nl_peer_modify(struct ovpn_peer *peer, struct genl_info *info,
local_ip = ovpn_nl_attr_local_ip(attrs);
/* set peer sockaddr */
- ret = ovpn_peer_reset_sockaddr(peer, &ss, local_ip);
+ ret = ovpn_peer_reset_sockaddr(peer, &ss, local_ip,
+ GFP_ATOMIC | __GFP_ACCOUNT);
if (ret < 0) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot set peer sockaddr: %d",
@@ -95,7 +95,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
int ret;
/* alloc and init peer object */
- peer = kzalloc_obj(*peer);
+ peer = kzalloc_obj(*peer, GFP_KERNEL_ACCOUNT);
if (!peer)
return ERR_PTR(-ENOMEM);
@@ -117,7 +117,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
ovpn_peer_stats_init(&peer->link_stats);
INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send);
- ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL);
+ ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL_ACCOUNT);
if (ret < 0) {
netdev_err(ovpn->dev,
"cannot initialize dst cache for peer %u\n",
@@ -126,7 +126,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
return ERR_PTR(ret);
}
- netdev_hold(ovpn->dev, &peer->dev_tracker, GFP_KERNEL);
+ netdev_hold(ovpn->dev, &peer->dev_tracker, GFP_KERNEL_ACCOUNT);
return peer;
}
@@ -136,12 +136,14 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
* @peer: peer to recreate the binding for
* @ss: sockaddr to use as remote endpoint for the binding
* @local_ip: local IP for the binding
+ * @gfp: GFP flags for the bind allocation. See ovpn_bind_from_sockaddr() for
+ * the accounting trade-off between process-context and softirq callers.
*
* Return: 0 on success or a negative error code otherwise
*/
int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
const struct sockaddr_storage *ss,
- const void *local_ip)
+ const void *local_ip, gfp_t gfp)
{
struct ovpn_bind *bind;
size_t ip_len;
@@ -149,7 +151,7 @@ int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
lockdep_assert_held(&peer->lock);
/* create new ovpn_bind object */
- bind = ovpn_bind_from_sockaddr(ss);
+ bind = ovpn_bind_from_sockaddr(ss, gfp);
if (IS_ERR(bind))
return PTR_ERR(bind);
@@ -281,9 +283,13 @@ void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb)
if (likely(!salen))
goto unlock;
+ /* RX float path runs in softirq context: __GFP_ACCOUNT would charge
+ * whatever cgroup is on-CPU when the packet arrives, not the userns
+ * owner, so pass plain GFP_ATOMIC and skip accounting on this path.
+ */
if (unlikely(ovpn_peer_reset_sockaddr(peer,
(struct sockaddr_storage *)&ss,
- local_ip) < 0))
+ local_ip, GFP_ATOMIC) < 0))
goto unlock;
net_dbg_ratelimited("%s: peer %d floated to %pIScp",
@@ -159,6 +159,6 @@ void ovpn_peer_keepalive_work(struct work_struct *work);
void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb);
int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
const struct sockaddr_storage *ss,
- const void *local_ip);
+ const void *local_ip, gfp_t gfp);
#endif /* _NET_OVPN_OVPNPEER_H_ */
@@ -191,7 +191,7 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
/* socket is not owned: attach to this ovpn instance */
- ovpn_sock = kzalloc_obj(*ovpn_sock);
+ ovpn_sock = kzalloc_obj(*ovpn_sock, GFP_KERNEL_ACCOUNT);
if (!ovpn_sock) {
ovpn_sock = ERR_PTR(-ENOMEM);
goto sock_release;
@@ -213,7 +213,7 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
*/
ovpn_sock->ovpn = peer->ovpn;
netdev_hold(peer->ovpn->dev, &ovpn_sock->dev_tracker,
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
}
/* the newly created ovpn_socket is holding reference to sk,