[Openvpn-devel,ovpn,net,3/9] ovpn: fix data race reading cached local endpoint on TX path

Message ID 20260526231850.2511369-3-a@unstable.cc
State New
Headers
Series [Openvpn-devel,ovpn,net,1/9] ovpn: skip rehash for peers already removed from by_id |

Commit Message

Antonio Quartulli May 26, 2026, 11:18 p.m. UTC
  From: Antonio Quartulli <antonio@openvpn.net>

The cached local source address bind->local is updated in place on a
live, RCU-published ovpn_bind while holding peer->lock: the UDP output
error paths reset it (ovpn_udp4_output()/ovpn_udp6_output()) and the RX
float path learns it (ovpn_peer_endpoints_update()). The UDP TX fast
path and the netlink dump, however, read bind->local holding only
rcu_read_lock(), never peer->lock.

For bind->local.ipv6 this is a torn read: struct in6_addr is 128 bit and
is copied as multiple words, so a concurrent in-place update can make a
reader observe a mix of the old and new address. The mangled source
address then feeds ip6_dst_lookup_flow() and udp_tunnel6_xmit_skb(). For
bind->local.ipv4 (a single aligned word) it is a data race without
tearing.

A spinlock on the per-packet TX path is not acceptable, and
READ_ONCE()/WRITE_ONCE() cannot atomically access the 128-bit IPv6
address (the >8-byte access is rejected at build time and per-word
accesses still can't yield a consistent snapshot).

Serialize the IPv6 field with a per-peer seqcount_spinlock_t tied to the
existing peer->lock: the in-place writers (already under peer->lock) bump
it, and readers take a lock-free read_seqcount_begin()/retry() snapshot
via the new ovpn_peer_local_ipv6() helper. The single-word IPv4 field is
handled with plain READ_ONCE()/WRITE_ONCE(). bind->remote is untouched:
it is immutable for a given bind object (only swapped via whole-bind RCU
replacement), so reading it locklessly remains safe.

Fixes: 08857b5ec5d9 ("ovpn: implement basic TX path (UDP)")
Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
 drivers/net/ovpn/netlink.c | 13 +++++++++++--
 drivers/net/ovpn/peer.c    | 26 +++++++++++++++++++++++++-
 drivers/net/ovpn/peer.h    |  6 ++++++
 drivers/net/ovpn/udp.c     | 17 +++++++++++++----
 4 files changed, 55 insertions(+), 7 deletions(-)
  

Comments

Antonio Quartulli May 29, 2026, 12:38 p.m. UTC | #1
On 27/05/2026 01:18, Antonio Quartulli wrote:
> From: Antonio Quartulli <antonio@openvpn.net>
> 
> The cached local source address bind->local is updated in place on a
> live, RCU-published ovpn_bind while holding peer->lock: the UDP output
> error paths reset it (ovpn_udp4_output()/ovpn_udp6_output()) and the RX
> float path learns it (ovpn_peer_endpoints_update()). The UDP TX fast
> path and the netlink dump, however, read bind->local holding only
> rcu_read_lock(), never peer->lock.
> 
> For bind->local.ipv6 this is a torn read: struct in6_addr is 128 bit and
> is copied as multiple words, so a concurrent in-place update can make a
> reader observe a mix of the old and new address. The mangled source
> address then feeds ip6_dst_lookup_flow() and udp_tunnel6_xmit_skb(). For
> bind->local.ipv4 (a single aligned word) it is a data race without
> tearing.
> 
> A spinlock on the per-packet TX path is not acceptable, and
> READ_ONCE()/WRITE_ONCE() cannot atomically access the 128-bit IPv6
> address (the >8-byte access is rejected at build time and per-word
> accesses still can't yield a consistent snapshot).
> 
> Serialize the IPv6 field with a per-peer seqcount_spinlock_t tied to the
> existing peer->lock: the in-place writers (already under peer->lock) bump
> it, and readers take a lock-free read_seqcount_begin()/retry() snapshot
> via the new ovpn_peer_local_ipv6() helper. The single-word IPv4 field is
> handled with plain READ_ONCE()/WRITE_ONCE(). bind->remote is untouched:
> it is immutable for a given bind object (only swapped via whole-bind RCU
> replacement), so reading it locklessly remains safe.
> 
> Fixes: 08857b5ec5d9 ("ovpn: implement basic TX path (UDP)")
> Signed-off-by: Antonio Quartulli <antonio@openvpn.net>

Using seqcount_spinlock may be a bit overkill and complicated.

Ralf is attempting a fix by turning the ovpn_bind object to an RCU 
protected pointer.

This way any modification will be made by creating a new bind object and 
purging the old one, rather than modifying the current one.

We'll see how large the patch happens to be and possibly replace this one.

Regards,
  

Patch

diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index 4dad85294198..8e21fa3e7822 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -610,14 +610,23 @@  static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
 	bind = rcu_dereference(peer->bind);
 	if (bind) {
 		if (bind->remote.in4.sin_family == AF_INET) {
+			/* bind->local is updated in place under peer->lock;
+			 * READ_ONCE() pairs with the WRITE_ONCE() updaters
+			 */
 			if (nla_put_in_addr(skb, OVPN_A_PEER_REMOTE_IPV4,
 					    bind->remote.in4.sin_addr.s_addr) ||
 			    nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT,
 					  bind->remote.in4.sin_port) ||
 			    nla_put_in_addr(skb, OVPN_A_PEER_LOCAL_IPV4,
-					    bind->local.ipv4.s_addr))
+					    READ_ONCE(bind->local.ipv4.s_addr)))
 				goto err_unlock;
 		} else if (bind->remote.in4.sin_family == AF_INET6) {
+			struct in6_addr local_ipv6;
+
+			/* read the 128-bit local address under the peer
+			 * seqcount to avoid a torn read
+			 */
+			ovpn_peer_local_ipv6(peer, bind, &local_ipv6);
 			if (nla_put_in6_addr(skb, OVPN_A_PEER_REMOTE_IPV6,
 					     &bind->remote.in6.sin6_addr) ||
 			    nla_put_u32(skb, OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
@@ -625,7 +634,7 @@  static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
 			    nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT,
 					  bind->remote.in6.sin6_port) ||
 			    nla_put_in6_addr(skb, OVPN_A_PEER_LOCAL_IPV6,
-					     &bind->local.ipv6))
+					     &local_ipv6))
 				goto err_unlock;
 		}
 	}
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index 8aa07560bb30..bbb1946fa5b4 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -112,6 +112,7 @@  struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
 	RCU_INIT_POINTER(peer->bind, NULL);
 	ovpn_crypto_state_init(&peer->crypto);
 	spin_lock_init(&peer->lock);
+	seqcount_spinlock_init(&peer->bind_local_seq, &peer->lock);
 	kref_init(&peer->refcount);
 	ovpn_peer_stats_init(&peer->vpn_stats);
 	ovpn_peer_stats_init(&peer->link_stats);
@@ -175,6 +176,27 @@  int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
 	return 0;
 }
 
+/**
+ * ovpn_peer_local_ipv6 - read the cached local IPv6 endpoint of a peer
+ * @peer: the peer owning the binding
+ * @bind: the binding to read the local address from
+ * @dst: where the local IPv6 address is copied to
+ *
+ * bind->local is updated in place under peer->lock (TX error path and RX
+ * float path). Read the 128-bit address under the peer seqcount so that
+ * lockless readers never observe a torn value.
+ */
+void ovpn_peer_local_ipv6(const struct ovpn_peer *peer,
+			  const struct ovpn_bind *bind, struct in6_addr *dst)
+{
+	unsigned int seq;
+
+	do {
+		seq = read_seqcount_begin(&peer->bind_local_seq);
+		*dst = bind->local.ipv6;
+	} while (read_seqcount_retry(&peer->bind_local_seq, seq));
+}
+
 /* variable name __tbl2 needs to be different from __tbl1
  * in the macro below to avoid confusing clang
  */
@@ -237,7 +259,7 @@  void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb)
 					    netdev_name(peer->ovpn->dev),
 					    peer->id, &bind->local.ipv4.s_addr,
 					    &ip_hdr(skb)->daddr);
-			bind->local.ipv4.s_addr = ip_hdr(skb)->daddr;
+			WRITE_ONCE(bind->local.ipv4.s_addr, ip_hdr(skb)->daddr);
 			reset_cache = true;
 		}
 		break;
@@ -268,7 +290,9 @@  void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb)
 					    netdev_name(peer->ovpn->dev),
 					    peer->id, &bind->local.ipv6,
 					    &ipv6_hdr(skb)->daddr);
+			write_seqcount_begin(&peer->bind_local_seq);
 			bind->local.ipv6 = ipv6_hdr(skb)->daddr;
+			write_seqcount_end(&peer->bind_local_seq);
 			reset_cache = true;
 		}
 		break;
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index dfa5c0037e02..c0994c606554 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -10,6 +10,7 @@ 
 #ifndef _NET_OVPN_OVPNPEER_H_
 #define _NET_OVPN_OVPNPEER_H_
 
+#include <linux/seqlock.h>
 #include <net/dst_cache.h>
 #include <net/strparser.h>
 
@@ -56,6 +57,8 @@ 
  * @link_stats: per-peer link/transport TX/RX stats
  * @delete_reason: why peer was deleted (i.e. timeout, transport error, ..)
  * @lock: protects binding to peer (bind) and keepalive* fields
+ * @bind_local_seq: seqcount serializing in-place updates of bind->local
+ *		    (done under @lock) against lockless readers on the TX path
  * @refcount: reference counter
  * @rcu: used to free peer in an RCU safe way
  * @release_entry: entry for the socket release list
@@ -110,6 +113,7 @@  struct ovpn_peer {
 	struct ovpn_peer_stats link_stats;
 	enum ovpn_del_peer_reason delete_reason;
 	spinlock_t lock; /* protects bind  and keepalive* */
+	seqcount_spinlock_t bind_local_seq; /* protects bind->local */
 	struct kref refcount;
 	struct rcu_head rcu;
 	struct llist_node release_entry;
@@ -151,6 +155,8 @@  struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
 				       struct sk_buff *skb);
 void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer);
 void ovpn_peer_hash_transp_addr(struct ovpn_peer *peer);
+void ovpn_peer_local_ipv6(const struct ovpn_peer *peer,
+			  const struct ovpn_bind *bind, struct in6_addr *dst);
 bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb,
 			    struct ovpn_peer *peer);
 
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index 8811aa9eedeb..60d32dc5af4a 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -147,7 +147,10 @@  static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 {
 	struct rtable *rt;
 	struct flowi4 fl = {
-		.saddr = bind->local.ipv4.s_addr,
+		/* bind->local is updated in place under peer->lock; a single
+		 * aligned word is read/written atomically via {READ,WRITE}_ONCE
+		 */
+		.saddr = READ_ONCE(bind->local.ipv4.s_addr),
 		.daddr = bind->remote.in4.sin_addr.s_addr,
 		.fl4_sport = inet_sk(sk)->inet_sport,
 		.fl4_dport = bind->remote.in4.sin_port,
@@ -169,7 +172,7 @@  static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 		 */
 		fl.saddr = 0;
 		spin_lock_bh(&peer->lock);
-		bind->local.ipv4.s_addr = 0;
+		WRITE_ONCE(bind->local.ipv4.s_addr, 0);
 		spin_unlock_bh(&peer->lock);
 		dst_cache_reset(cache);
 	}
@@ -178,7 +181,7 @@  static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 	if (IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) {
 		fl.saddr = 0;
 		spin_lock_bh(&peer->lock);
-		bind->local.ipv4.s_addr = 0;
+		WRITE_ONCE(bind->local.ipv4.s_addr, 0);
 		spin_unlock_bh(&peer->lock);
 		dst_cache_reset(cache);
 
@@ -224,7 +227,6 @@  static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 	int ret;
 
 	struct flowi6 fl = {
-		.saddr = bind->local.ipv6,
 		.daddr = bind->remote.in6.sin6_addr,
 		.fl6_sport = inet_sk(sk)->inet_sport,
 		.fl6_dport = bind->remote.in6.sin6_port,
@@ -233,6 +235,11 @@  static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 		.flowi6_oif = bind->remote.in6.sin6_scope_id,
 	};
 
+	/* bind->local is updated in place under peer->lock; read the 128-bit
+	 * address under the peer seqcount to avoid a torn read
+	 */
+	ovpn_peer_local_ipv6(peer, bind, &fl.saddr);
+
 	local_bh_disable();
 	dst = dst_cache_get_ip6(cache, &fl.saddr);
 	if (dst)
@@ -245,7 +252,9 @@  static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
 		 */
 		fl.saddr = in6addr_any;
 		spin_lock_bh(&peer->lock);
+		write_seqcount_begin(&peer->bind_local_seq);
 		bind->local.ipv6 = in6addr_any;
+		write_seqcount_end(&peer->bind_local_seq);
 		spin_unlock_bh(&peer->lock);
 		dst_cache_reset(cache);
 	}