@@ -105,6 +105,80 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb)
local_bh_enable();
}
+/**
+ * ovpn_mcast_mld_offset - compute the offset to the MLD payload in an IPv6 packet
+ * @skb: the packet to inspect
+ * @offsetp: pointer to store the computed offset
+ *
+ * MLD packets may be preceded by a Hop-by-Hop options header containing
+ * the Router Alert option. Calculate the actual payload offset and
+ * verify that the next header is ICMPv6.
+ *
+ * Caller must ensure that the IPv6 header is linearized.
+ *
+ * Return: true if the offset was computed successfully, false otherwise
+ */
+static bool ovpn_mcast_mld_offset(struct sk_buff *skb, unsigned int *offsetp)
+{
+ unsigned int offset = sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+
+ if (nexthdr == IPPROTO_HOPOPTS) {
+ struct ipv6_opt_hdr *hopopt;
+
+ if (!pskb_may_pull(skb, offset + sizeof(*hopopt)))
+ return false;
+
+ hopopt = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset);
+ nexthdr = hopopt->nexthdr;
+ offset += ipv6_optlen(hopopt);
+ }
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return false;
+
+ *offsetp = offset;
+ return true;
+}
+
+/**
+ * ovpn_mcast_is_control - determine whether an skb is multicast control traffic
+ * @skb: the packet to inspect
+ *
+ * Caller must ensure that IP/IPv6 headers are linearized.
+ *
+ * Return: true if the skb contains IGMP or MLD control traffic,
+ * false otherwise
+ */
+static bool ovpn_mcast_is_control(struct sk_buff *skb)
+{
+ unsigned int offset;
+ struct icmp6hdr *ih;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return ip_hdr(skb)->protocol == IPPROTO_IGMP;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return false;
+
+ if (!ovpn_mcast_mld_offset(skb, &offset))
+ return false;
+
+ if (!pskb_may_pull(skb, offset + sizeof(*ih)))
+ return false;
+
+ ih = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+ switch (ih->icmp6_type) {
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MLD2_REPORT:
+ return true;
+ }
+
+ return false;
+}
+
void ovpn_decrypt_post(void *data, int ret)
{
struct ovpn_crypto_key_slot *ks;
@@ -183,8 +257,13 @@ void ovpn_decrypt_post(void *data, int ret)
}
skb->protocol = proto;
- /* perform Reverse Path Filtering (RPF) */
- if (unlikely(!ovpn_peer_check_by_src(peer->ovpn, skb, peer))) {
+ /* perform Reverse Path Filtering (RPF).
+ * IGMP/MLD protocols may use source addresses
+ * that differ from the peer's VPN address
+ * so we bypass RPF in that case
+ */
+ if (unlikely(!ovpn_mcast_is_control(skb) &&
+ !ovpn_peer_check_by_src(peer->ovpn, skb, peer))) {
if (skb->protocol == htons(ETH_P_IPV6))
net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI6c\n",
netdev_name(peer->ovpn->dev),
@@ -351,6 +430,70 @@ static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb,
ovpn_peer_put(peer);
}
+static void ovpn_bcast_work(struct work_struct *work)
+{
+ struct ovpn_priv *ovpn = container_of_const(work, struct ovpn_priv, bcast.work);
+ struct sk_buff *skb, *to_send;
+ struct llist_head peer_list;
+ struct llist_node *node, *n;
+ struct ovpn_peer *peer;
+ int bkt;
+
+ while ((skb = skb_dequeue(&ovpn->bcast.queue))) {
+ skb_mark_not_on_list(skb);
+ init_llist_head(&peer_list);
+
+ rcu_read_lock();
+ hash_for_each_rcu(ovpn->peers->by_id, bkt, peer, hash_entry_id) {
+ if (likely(ovpn_peer_hold(peer)))
+ llist_add(&peer->bcast_entry, &peer_list);
+ }
+ rcu_read_unlock();
+
+ if (unlikely(llist_empty(&peer_list))) {
+ dev_dstats_tx_dropped(ovpn->dev);
+ skb_tx_error(skb);
+ kfree_skb(skb);
+ continue;
+ }
+
+ llist_for_each_safe(node, n, peer_list.first) {
+ peer = llist_entry(node, struct ovpn_peer, bcast_entry);
+
+ if (likely(n))
+ to_send = skb_clone(skb, GFP_KERNEL);
+ else
+ to_send = skb;
+
+ if (likely(to_send)) {
+ ovpn_peer_stats_increment_tx(&peer->vpn_stats, skb->len);
+ ovpn_send(ovpn, to_send, peer);
+ continue;
+ }
+ dev_dstats_tx_dropped(ovpn->dev);
+ ovpn_peer_put(peer);
+ }
+ }
+}
+
+int ovpn_bcast_init(struct ovpn_priv *ovpn)
+{
+ skb_queue_head_init(&ovpn->bcast.queue);
+ INIT_WORK(&ovpn->bcast.work, ovpn_bcast_work);
+ ovpn->bcast.wq = alloc_ordered_workqueue("ovpn-bcast-%s", WQ_MEM_RECLAIM,
+ netdev_name(ovpn->dev));
+ if (!ovpn->bcast.wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ovpn_bcast_exit(struct ovpn_priv *ovpn)
+{
+ cancel_work_sync(&ovpn->bcast.work);
+ skb_queue_purge(&ovpn->bcast.queue);
+}
+
/* Send user data to the network
*/
netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -362,6 +505,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
struct ovpn_peer *peer;
__be16 proto;
int ret;
+ bool bcast = false;
/* reset netfilter state */
nf_reset_ct(skb);
@@ -372,8 +516,8 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
goto drop_no_peer;
/* retrieve peer serving the destination IP of this packet */
- peer = ovpn_peer_get_by_dst(ovpn, skb);
- if (unlikely(!peer)) {
+ peer = ovpn_peer_get_by_dst(ovpn, skb, &bcast);
+ if (unlikely(!peer && !bcast)) {
switch (skb->protocol) {
case htons(ETH_P_IP):
net_dbg_ratelimited("%s: no peer to send data to dst=%pI4\n",
@@ -418,11 +562,31 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
continue;
}
+ if (unlikely(bcast)) {
+ spin_lock_bh(&ovpn->bcast.queue.lock);
+ if (unlikely(skb_queue_len(&ovpn->bcast.queue) >= OVPN_BCAST_MAX_QLEN)) {
+ spin_unlock_bh(&ovpn->bcast.queue.lock);
+ dev_dstats_tx_dropped(ovpn->dev);
+ skb_tx_error(curr);
+ kfree_skb(curr);
+ continue;
+ }
+ __skb_queue_tail(&ovpn->bcast.queue, curr);
+ spin_unlock_bh(&ovpn->bcast.queue.lock);
+ continue;
+ }
+
/* only count what we actually send */
tx_bytes += curr->len;
__skb_queue_tail(&skb_list, curr);
}
+ if (unlikely(bcast)) {
+ if (!skb_queue_empty(&ovpn->bcast.queue))
+ queue_work(ovpn->bcast.wq, &ovpn->bcast.work);
+ return NETDEV_TX_OK;
+ }
+
/* no segments survived: don't jump to 'drop' because we already
* incremented the counter for each failure in the loop
*/
@@ -438,7 +602,8 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
drop:
- ovpn_peer_put(peer);
+ if (peer)
+ ovpn_peer_put(peer);
drop_no_peer:
dev_dstats_tx_dropped(ovpn->dev);
skb_tx_error(skb);
@@ -31,4 +31,7 @@ void ovpn_xmit_special(struct ovpn_peer *peer, const void *data,
void ovpn_encrypt_post(void *data, int ret);
void ovpn_decrypt_post(void *data, int ret);
+int ovpn_bcast_init(struct ovpn_priv *ovpn);
+void ovpn_bcast_exit(struct ovpn_priv *ovpn);
+
#endif /* _NET_OVPN_OVPN_H_ */
@@ -30,6 +30,8 @@ static void ovpn_priv_free(struct net_device *net)
{
struct ovpn_priv *ovpn = netdev_priv(net);
+ if (ovpn->bcast.wq)
+ destroy_workqueue(ovpn->bcast.wq);
kfree(ovpn->peers);
}
@@ -155,7 +157,7 @@ static void ovpn_setup(struct net_device *dev)
dev->max_mtu = IP_MAX_MTU - OVPN_HEAD_ROOM;
dev->type = ARPHRD_NONE;
- dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST | IFF_BROADCAST;
dev->priv_flags |= IFF_NO_QUEUE;
/* when routing packets to a LAN behind a client, we rely on the
* route entry that originally brought the packet into ovpn, so
@@ -192,6 +194,9 @@ static int ovpn_newlink(struct net_device *dev,
spin_lock_init(&ovpn->lock);
INIT_DELAYED_WORK(&ovpn->keepalive_work, ovpn_peer_keepalive_work);
+ if (ovpn_bcast_init(ovpn))
+ return -ENOMEM;
+
/* Set carrier explicitly after registration, this way state is
* clearly defined.
*
@@ -212,6 +217,7 @@ static void ovpn_dellink(struct net_device *dev, struct list_head *head)
{
struct ovpn_priv *ovpn = netdev_priv(dev);
+ ovpn_bcast_exit(ovpn);
cancel_delayed_work_sync(&ovpn->keepalive_work);
ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN);
unregister_netdevice_queue(dev, head);
@@ -32,6 +32,14 @@ struct ovpn_peer_collection {
struct hlist_nulls_head by_transp_addr[1 << 12];
};
+#define OVPN_BCAST_MAX_QLEN 1000
+
+struct ovpn_bcast {
+ struct sk_buff_head queue;
+ struct work_struct work;
+ struct workqueue_struct *wq;
+};
+
/**
* struct ovpn_priv - per ovpn interface state
* @dev: the actual netdev representing the tunnel
@@ -41,6 +49,7 @@ struct ovpn_peer_collection {
* @peer: in P2P mode, this is the only remote peer
* @gro_cells: pointer to the Generic Receive Offload cell
* @keepalive_work: struct used to schedule keepalive periodic job
+ * @bcast: struct used to queue and transmit broadcast messages
*/
struct ovpn_priv {
struct net_device *dev;
@@ -50,6 +59,7 @@ struct ovpn_priv {
struct ovpn_peer __rcu *peer;
struct gro_cells gro_cells;
struct delayed_work keepalive_work;
+ struct ovpn_bcast bcast;
};
#endif /* _NET_OVPN_OVPNSTRUCT_H_ */
@@ -722,6 +722,8 @@ static void ovpn_peer_remove(struct ovpn_peer *peer,
* ovpn_peer_get_by_dst - Lookup peer to send skb to
* @ovpn: the private data representing the current VPN session
* @skb: the skb to extract the destination address from
+ * @bcast: a pointer to a bool. It's set to true if the packet is a
+ * broadcast or a multicast.
*
* This function takes a tunnel packet and looks up the peer to send it to
* after encapsulation. The skb is expected to be the in-tunnel packet, without
@@ -731,10 +733,11 @@ static void ovpn_peer_remove(struct ovpn_peer *peer,
*
* Return: the peer if found or NULL otherwise.
*/
-struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
- struct sk_buff *skb)
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn, struct sk_buff *skb,
+ bool *bcast)
{
struct ovpn_peer *peer = NULL;
+ unsigned int addr_type;
struct in6_addr addr6;
__be32 addr4;
@@ -755,11 +758,23 @@ struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
case htons(ETH_P_IP):
addr4 = ovpn_nexthop_from_skb4(skb);
peer = ovpn_peer_get_by_vpn_addr4(ovpn, addr4);
+
+ if (peer)
+ break;
+
+ addr_type = inet_dev_addr_type(dev_net(ovpn->dev), ovpn->dev, addr4);
+ if (addr_type == RTN_MULTICAST || addr_type == RTN_BROADCAST)
+ *bcast = true;
break;
case htons(ETH_P_IPV6):
addr6 = ovpn_nexthop_from_skb6(skb);
peer = ovpn_peer_get_by_vpn_addr6(ovpn, &addr6);
- break;
+
+ if (peer)
+ break;
+
+ if (ipv6_addr_is_multicast(&addr6))
+ *bcast = true;
}
if (unlikely(peer && !ovpn_peer_hold(peer)))
@@ -59,6 +59,7 @@
* @refcount: reference counter
* @rcu: used to free peer in an RCU safe way
* @release_entry: entry for the socket release list
+ * @bcast_entry: entry for the broadcast peers list
* @keepalive_work: used to schedule keepalive sending
*/
struct ovpn_peer {
@@ -113,6 +114,7 @@ struct ovpn_peer {
struct kref refcount;
struct rcu_head rcu;
struct llist_node release_entry;
+ struct llist_node bcast_entry;
struct work_struct keepalive_work;
};
@@ -148,8 +150,8 @@ void ovpn_peers_free(struct ovpn_priv *ovpn, struct sock *sock,
struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_priv *ovpn,
struct sk_buff *skb);
struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_priv *ovpn, u32 peer_id);
-struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
- struct sk_buff *skb);
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn, struct sk_buff *skb,
+ bool *bcast);
void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer);
bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb,
struct ovpn_peer *peer);
The ovpn DCO driver currently drops all multicast/broadcast packets because it does not set IFF_MULTICAST and IFF_BROADCAST on the netdevice and always performs a unicast peer lookup in ovpn_net_xmit(). This prevents multicast routing daemons such as smcroute from using an ovpn interface as a multicast VIF and makes it impossible to forward multicast and broadcast traffic to VPN clients. Add the minimal infrastructure needed to get multicast/broadcast working: - Set IFF_MULTICAST and IFF_BROADCAST in ovpn_setup(). - Detect multicast and broadcast destinations in ovpn_peer_get_by_dst() and set the bcast flag to true. - Introduce ovpn_bcast_work() to transmit enqueued broadcast messages. - Allow all IGMP/MLD packets to bypass the RPF check in the RX path. Multicast traffic is treated as broadcast and flooded to all peers. Signed-off-by: Marco Baffo <marco@mandelbit.com> --- Changes in v2: - Replace broadcast path with a deferred workqueue, avoiding GFP_ATOMIC: introduce struct ovpn_bcast (queue, work, wq) embedded in ovpn_priv. - Add struct llist_node bcast_entry to ovpn_peer to build a lockless peer snapshot under RCU without allocating peer list nodes. - Process broadcast packets in an ordered workqueue so the entire send path runs in process context and can use GFP_KERNEL. - Queue broadcast skbs directly to bcast.queue inside the main ovpn_net_xmit() loop instead of building a temporary skb_list. drivers/net/ovpn/io.c | 175 ++++++++++++++++++++++++++++++++++-- drivers/net/ovpn/io.h | 3 + drivers/net/ovpn/main.c | 8 +- drivers/net/ovpn/ovpnpriv.h | 10 +++ drivers/net/ovpn/peer.c | 21 ++++- drivers/net/ovpn/peer.h | 6 +- 6 files changed, 212 insertions(+), 11 deletions(-)