不管是收到報文轉發還是本機發送報文,最后都會調用dst_output
/* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { /* * 如果是單播數據包,設置的是ip_output(), * 如果是組播數據包,設置的是ip_mc_output().dev_queue_xmit */ return skb_dst(skb)->output(net, sk, skb); }
單播:
/*
* 對於單播數據包,目的路由緩存項中的輸出接口是ip_output().
*/
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
/*
* 設置數據包的輸出網絡設備和數據包網絡
* 層協議類型。
*/
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
/*
* 經netfilter處理后,調用ip_finish_output()繼續IP數據包的輸出
*/
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, skb, NULL, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
/*
//如果不支持TSO或者GSO,tcp發送的時候是按照mss來組織skb的,
所以skb->len會等於mtu 所以TCP叫分段,和IP分片不一樣,只有UDP才有IP分片
//SKB不是gso類型,並且skb->len大於mtu則需要分片
對方接受后的分片重組在netfilter中的ipv4_conntrack_defrag
*/
mtu = ip_skb_dst_mtu(sk, skb);
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
/* 如果數據包長度大於MTU,則調用ip_fragment()
* 對IP數據包進行分片。
*/
if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
}
/* ip send the packet by ip_finish_output2*/
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
u32 nexthop;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
/* Be paranoid, rather than too clever. */
/* skb頭部空間不能存儲鏈路頭 */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
/* 重新分配skb */
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
if (!skb2) {
kfree_skb(skb);
return -ENOMEM;
}
if (skb->sk)/* 關聯控制塊 */
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb); /* 釋放skb */
skb = skb2; /* 指向新的skb */
}
rcu_read_lock_bh();
/* 獲取下一跳 */
nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);// get the dst ip address (u32)
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
//根據目的IP查找鄰居項是否存在
//如果沒有則創建鄰居項,然后通過dst_neigh_output 發包
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
if (!IS_ERR(neigh)) {/* 成功 */
int res = dst_neigh_output(dst, neigh, skb); /* 通過鄰居子系統輸出 */
rcu_read_unlock_bh();
return res;
}
rcu_read_unlock_bh();
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
kfree_skb(skb);
return -EINVAL;
}

最后通過令鄰居子系統,調用dev_queue_xmit 將數據報文發送給鏈路層驅動
