@@ -538,6 +538,8 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
+int tcp_clamp_mss_option(struct sk_buff *skb, struct tcphdr *th, u16 maxmss);
+
/*
* BPF SKB-less helpers
*/
@@ -721,6 +721,76 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
tp->snd_up = tp->write_seq;
}
+/**
+ * tcp_clamp_mss_option - clamp any existing TCP MSS option
+ * @skb: skb containing the TCP segment
+ * @th: TCP header in @skb
+ * @maxmss: upper bound for the TCP MSS option value
+ *
+ * Parse the TCP option block and lower any existing MSS option to @maxmss.
+ * The MSS value is never increased. If any MSS value is changed, the TCP
+ * checksum in @th is updated.
+ *
+ * The caller must ensure that @th and the complete TCP option block are
+ * present in the linear data area and writable.
+ *
+ * Return: 0 when at least one MSS option was handled, -ENOENT when no MSS
+ * option is present, or -EINVAL when the TCP option block is malformed before
+ * any MSS option is found.
+ */
+int tcp_clamp_mss_option(struct sk_buff *skb, struct tcphdr *th, u16 maxmss)
+{
+ int length = th->doff * 4 - sizeof(*th);
+ u8 *ptr = (u8 *)(th + 1);
+ bool found = false;
+ int ret = -ENOENT;
+ u16 oldmss;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ ret = -ENOENT;
+ goto out;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ if (length < 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+ found = true;
+ oldmss = get_unaligned_be16(ptr);
+ if (oldmss && oldmss > maxmss) {
+ put_unaligned_be16(maxmss, ptr);
+ inet_proto_csum_replace2(&th->check,
+ skb,
+ htons(oldmss),
+ htons(maxmss),
+ false);
+ }
+ }
+
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+out:
+ return found ? 0 : ret;
+}
+EXPORT_SYMBOL_GPL(tcp_clamp_mss_option);
+
/* If a not yet filled skb is pushed, do not send it if
* we have data packets in Qdisc or NIC queues :
* Because TX completion will happen shortly, it gives a chance
@@ -30,16 +30,6 @@ MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
MODULE_ALIAS("ipt_TCPMSS");
MODULE_ALIAS("ip6t_TCPMSS");
-static inline unsigned int
-optlen(const u_int8_t *opt, unsigned int offset)
-{
- /* Beware zero-length options: make finite progress */
- if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
- return 1;
- else
- return opt[offset+1];
-}
-
static u_int32_t tcpmss_reverse_mtu(struct net *net,
const struct sk_buff *skb,
unsigned int family)
@@ -77,7 +67,6 @@ tcpmss_mangle_packet(struct sk_buff *skb,
const struct xt_tcpmss_info *info = par->targinfo;
struct tcphdr *tcph;
int len, tcp_hdrlen;
- unsigned int i;
__be16 oldval;
u16 newmss;
u8 *opt;
@@ -113,29 +102,8 @@ tcpmss_mangle_packet(struct sk_buff *skb,
} else
newmss = info->mss;
- opt = (u_int8_t *)tcph;
- for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
- if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
- u_int16_t oldmss;
-
- oldmss = (opt[i+2] << 8) | opt[i+3];
-
- /* Never increase MSS, even when setting it, as
- * doing so results in problems for hosts that rely
- * on MSS being set correctly.
- */
- if (oldmss <= newmss)
- return 0;
-
- opt[i+2] = (newmss & 0xff00) >> 8;
- opt[i+3] = newmss & 0x00ff;
-
- inet_proto_csum_replace2(&tcph->check, skb,
- htons(oldmss), htons(newmss),
- false);
- return 0;
- }
- }
+ if (tcp_clamp_mss_option(skb, tcph, newmss) == 0)
+ return 0;
/* There is data after the header so the option can't be added
* without moving it, and doing so may make the SYN packet
xt_TCPMSS has local logic for scanning TCP options and lowering an existing MSS option without increasing it. Move that scan-and-clamp logic into a TCP helper so other networking code can reuse it without duplicating TCP option parsing. Keep xt_TCPMSS responsible for the policy-specific behavior of adding a missing MSS option. Linux TCP option parsing keeps scanning after an MSS option, so a later duplicate can override an earlier value at the receiver. While factoring out the scan, keep walking the TCP option block after a valid MSS option and clamp later MSS options as well, while preserving the rule that MSS values are never increased. The helper returns 0 when at least one MSS option was handled, whether or not any value had to be lowered. It returns -ENOENT when no MSS option is present and -EINVAL when option parsing fails before any MSS option is found. If parsing fails after an MSS option was handled, the helper still returns 0 so callers do not treat the packet as missing an MSS option after the skb may already have been modified. Signed-off-by: Ralf Lici <ralf@mandelbit.com> --- Changes since v3 https://lore.kernel.org/openvpn-devel/20260519080500.120724-1-ralf@mandelbit.com/ - keep scanning the TCP option block and clamp all MSS options No changes since v2 https://lore.kernel.org/openvpn-devel/20260518085908.135570-1-ralf@mandelbit.com/ No changes since v1 https://lore.kernel.org/openvpn-devel/20260515075941.102225-1-ralf@mandelbit.com/ include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 70 +++++++++++++++++++++++++++++++++++++++ net/netfilter/xt_TCPMSS.c | 36 ++------------------ 3 files changed, 74 insertions(+), 34 deletions(-)