[Openvpn-devel,ovpn,net-next,v4,1/3] tcp: factor out TCP MSS option clamping

Message ID 20260520091613.158891-1-ralf@mandelbit.com
State New
Headers show
Series [Openvpn-devel,ovpn,net-next,v4,1/3] tcp: factor out TCP MSS option clamping | expand

Commit Message

Ralf Lici May 20, 2026, 9:16 a.m. UTC
xt_TCPMSS has local logic for scanning TCP options and lowering an
existing MSS option without increasing it.

Move that scan-and-clamp logic into a TCP helper so other networking
code can reuse it without duplicating TCP option parsing. Keep xt_TCPMSS
responsible for the policy-specific behavior of adding a missing MSS
option.

Linux TCP option parsing keeps scanning after an MSS option, so a later
duplicate can override an earlier value at the receiver. While factoring
out the scan, keep walking the TCP option block after a valid MSS option
and clamp later MSS options as well, while preserving the rule that MSS
values are never increased.

The helper returns 0 when at least one MSS option was handled, whether
or not any value had to be lowered. It returns -ENOENT when no MSS
option is present and -EINVAL when option parsing fails before any MSS
option is found. If parsing fails after an MSS option was handled, the
helper still returns 0 so callers do not treat the packet as missing an
MSS option after the skb may already have been modified.

Signed-off-by: Ralf Lici <ralf@mandelbit.com>
---
Changes since v3 https://lore.kernel.org/openvpn-devel/20260519080500.120724-1-ralf@mandelbit.com/
- keep scanning the TCP option block and clamp all MSS options

No changes since v2 https://lore.kernel.org/openvpn-devel/20260518085908.135570-1-ralf@mandelbit.com/

No changes since v1 https://lore.kernel.org/openvpn-devel/20260515075941.102225-1-ralf@mandelbit.com/

 include/net/tcp.h         |  2 ++
 net/ipv4/tcp.c            | 70 +++++++++++++++++++++++++++++++++++++++
 net/netfilter/xt_TCPMSS.c | 36 ++------------------
 3 files changed, 74 insertions(+), 34 deletions(-)

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3c4e6adb0dbd..e722c7d936bf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -538,6 +538,8 @@  void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
 		       int estab, struct tcp_fastopen_cookie *foc);
 
+int tcp_clamp_mss_option(struct sk_buff *skb, struct tcphdr *th, u16 maxmss);
+
 /*
  *	BPF SKB-less helpers
  */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21ece4c71612..521aa63f5958 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -721,6 +721,76 @@  static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
 		tp->snd_up = tp->write_seq;
 }
 
+/**
+ * tcp_clamp_mss_option - clamp any existing TCP MSS option
+ * @skb: skb containing the TCP segment
+ * @th: TCP header in @skb
+ * @maxmss: upper bound for the TCP MSS option value
+ *
+ * Parse the TCP option block and lower any existing MSS option to @maxmss.
+ * The MSS value is never increased. If any MSS value is changed, the TCP
+ * checksum in @th is updated.
+ *
+ * The caller must ensure that @th and the complete TCP option block are
+ * present in the linear data area and writable.
+ *
+ * Return: 0 when at least one MSS option was handled, -ENOENT when no MSS
+ * option is present, or -EINVAL when the TCP option block is malformed before
+ * any MSS option is found.
+ */
+int tcp_clamp_mss_option(struct sk_buff *skb, struct tcphdr *th, u16 maxmss)
+{
+	int length = th->doff * 4 - sizeof(*th);
+	u8 *ptr = (u8 *)(th + 1);
+	bool found = false;
+	int ret = -ENOENT;
+	u16 oldmss;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			ret = -ENOENT;
+			goto out;
+		case TCPOPT_NOP:
+			length--;
+			continue;
+		default:
+			if (length < 2) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			opsize = *ptr++;
+			if (opsize < 2 || opsize > length) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+				found = true;
+				oldmss = get_unaligned_be16(ptr);
+				if (oldmss && oldmss > maxmss) {
+					put_unaligned_be16(maxmss, ptr);
+					inet_proto_csum_replace2(&th->check,
+								 skb,
+								 htons(oldmss),
+								 htons(maxmss),
+								 false);
+				}
+			}
+
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+out:
+	return found ? 0 : ret;
+}
+EXPORT_SYMBOL_GPL(tcp_clamp_mss_option);
+
 /* If a not yet filled skb is pushed, do not send it if
  * we have data packets in Qdisc or NIC queues :
  * Because TX completion will happen shortly, it gives a chance
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 80e1634bc51f..70983b757229 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -30,16 +30,6 @@  MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
 MODULE_ALIAS("ipt_TCPMSS");
 MODULE_ALIAS("ip6t_TCPMSS");
 
-static inline unsigned int
-optlen(const u_int8_t *opt, unsigned int offset)
-{
-	/* Beware zero-length options: make finite progress */
-	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
-		return 1;
-	else
-		return opt[offset+1];
-}
-
 static u_int32_t tcpmss_reverse_mtu(struct net *net,
 				    const struct sk_buff *skb,
 				    unsigned int family)
@@ -77,7 +67,6 @@  tcpmss_mangle_packet(struct sk_buff *skb,
 	const struct xt_tcpmss_info *info = par->targinfo;
 	struct tcphdr *tcph;
 	int len, tcp_hdrlen;
-	unsigned int i;
 	__be16 oldval;
 	u16 newmss;
 	u8 *opt;
@@ -113,29 +102,8 @@  tcpmss_mangle_packet(struct sk_buff *skb,
 	} else
 		newmss = info->mss;
 
-	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
-		if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
-			u_int16_t oldmss;
-
-			oldmss = (opt[i+2] << 8) | opt[i+3];
-
-			/* Never increase MSS, even when setting it, as
-			 * doing so results in problems for hosts that rely
-			 * on MSS being set correctly.
-			 */
-			if (oldmss <= newmss)
-				return 0;
-
-			opt[i+2] = (newmss & 0xff00) >> 8;
-			opt[i+3] = newmss & 0x00ff;
-
-			inet_proto_csum_replace2(&tcph->check, skb,
-						 htons(oldmss), htons(newmss),
-						 false);
-			return 0;
-		}
-	}
+	if (tcp_clamp_mss_option(skb, tcph, newmss) == 0)
+		return 0;
 
 	/* There is data after the header so the option can't be added
 	 * without moving it, and doing so may make the SYN packet