@@ -70,37 +70,56 @@ static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk,
peer->tcp.sk_cb.sk_data_ready(sk);
}
-static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
+static struct sk_buff *ovpn_tcp_skb_packet(const struct ovpn_peer *peer,
+ struct sk_buff *orig_skb,
+ const int pkt_len, const int pkt_off)
{
- struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
- struct strp_msg *msg = strp_msg(skb);
- size_t pkt_len = msg->full_len - 2;
- size_t off = msg->offset + 2;
- u8 opcode;
+ struct sk_buff *ovpn_skb;
+ int err;
- /* ensure skb->data points to the beginning of the openvpn packet */
- if (!pskb_pull(skb, off)) {
- net_warn_ratelimited("%s: packet too small for peer %u\n",
- netdev_name(peer->ovpn->dev), peer->id);
+ /* create a new skb with only the content of the current packet */
+ ovpn_skb = netdev_alloc_skb(peer->ovpn->dev, pkt_len);
+ if (unlikely(!ovpn_skb))
goto err;
- }
- /* strparser does not trim the skb for us, therefore we do it now */
- if (pskb_trim(skb, pkt_len) != 0) {
- net_warn_ratelimited("%s: trimming skb failed for peer %u\n",
+ skb_copy_header(ovpn_skb, orig_skb);
+ err = skb_copy_bits(orig_skb, pkt_off, skb_put(ovpn_skb, pkt_len),
+ pkt_len);
+ if (unlikely(err)) {
+ net_warn_ratelimited("%s: skb_copy_bits failed for peer %u\n",
netdev_name(peer->ovpn->dev), peer->id);
+ kfree_skb(ovpn_skb);
goto err;
}
- /* we need the first 4 bytes of data to be accessible
+ consume_skb(orig_skb);
+ return ovpn_skb;
+err:
+ kfree_skb(orig_skb);
+ return NULL;
+}
+
+static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
+{
+ struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
+ struct strp_msg *msg = strp_msg(skb);
+ int pkt_len = msg->full_len - 2;
+ u8 opcode;
+
+ /* we need at least 4 bytes of data in the packet
* to extract the opcode and the key ID later on
*/
- if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) {
+ if (unlikely(pkt_len < OVPN_OPCODE_SIZE)) {
net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n",
netdev_name(peer->ovpn->dev), peer->id);
goto err;
}
+ /* extract the packet into a new skb */
+ skb = ovpn_tcp_skb_packet(peer, skb, pkt_len, msg->offset + 2);
+ if (unlikely(!skb))
+ goto err;
+
/* DATA_V2 packets are handled in kernel, the rest goes to user space */
opcode = ovpn_opcode_from_skb(skb, 0);
if (unlikely(opcode != OVPN_DATA_V2)) {
@@ -113,7 +132,7 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
/* The packet size header must be there when sending the packet
* to userspace, therefore we put it back
*/
- skb_push(skb, 2);
+ *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(pkt_len);
ovpn_tcp_to_userspace(peer, strp->sk, skb);
return;
}
When processing TCP stream data in ovpn_tcp_recv, we receive large cloned skbs from __strp_rcv that may contain multiple coalesced packets. The current implementation has two bugs: 1. Header offset overflow: Using pskb_pull with large offsets on coalesced skbs causes skb->data - skb->head to exceed the u16 storage of skb->network_header. This causes skb_reset_network_header to fail on the inner decapsulated packet, resulting in packet drops. 2. Unaligned protocol headers: Extracting packets from arbitrary positions within the coalesced TCP stream provides no alignment guarantees for the packet data causing performance penalties on architectures without efficient unaligned access. Additionally, openvpn's 2-byte length prefix on TCP packets causes the subsequent 4-byte opcode and packet ID fields to be inherently misaligned. Fix both issues by allocating a new skb for each openvpn packet and using skb_copy_bits to extract only the packet content into the new buffer, skipping the 2-byte length prefix. Also, check the length before invoking the function that performs the allocation to avoid creating an invalid skb. If the packet has to be forwarded to userspace the 2-byte prefix can be pushed to the head safely, without misalignment. As a side effect, this approach also avoids the expensive linearization that pskb_pull triggers on cloned skbs with page fragments. In testing, this resulted in TCP throughput improvements of up to 74%. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Ralf Lici <ralf@mandelbit.com> --- Changes since v1: - updated the __skb_push usage for consistency with similar operations, such as in ovpn_tcp_send_skb drivers/net/ovpn/tcp.c | 53 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-)