不管是收到報文轉發還是本機發送報文,最后都會調用dst_output
/* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { /* * 如果是單播數據包,設置的是ip_output(), * 如果是組播數據包,設置的是ip_mc_output().dev_queue_xmit */ return skb_dst(skb)->output(net, sk, skb); }
單播:
/* * 對於單播數據包,目的路由緩存項中的輸出接口是ip_output(). */ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); /* * 設置數據包的輸出網絡設備和數據包網絡 * 層協議類型。 */ skb->dev = dev; skb->protocol = htons(ETH_P_IP); /* * 經netfilter處理后,調用ip_finish_output()繼續IP數據包的輸出 */ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); }
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); } #endif /* //如果不支持TSO或者GSO,tcp發送的時候是按照mss來組織skb的, 所以skb->len會等於mtu 所以TCP叫分段,和IP分片不一樣,只有UDP才有IP分片 //SKB不是gso類型,並且skb->len大於mtu則需要分片 對方接受后的分片重組在netfilter中的ipv4_conntrack_defrag */ mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); /* 如果數據包長度大於MTU,則調用ip_fragment() * 對IP數據包進行分片。 */ if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); }
/* ip send the packet by ip_finish_output2*/ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); /* Be paranoid, rather than too clever. */ /* skb頭部空間不能存儲鏈路頭 */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; /* 重新分配skb */ skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); if (!skb2) { kfree_skb(skb); return -ENOMEM; } if (skb->sk)/* 關聯控制塊 */ skb_set_owner_w(skb2, skb->sk); consume_skb(skb); /* 釋放skb */ skb = skb2; /* 指向新的skb */ } rcu_read_lock_bh(); /* 獲取下一跳 */ nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);// get the dst ip address (u32) neigh = __ipv4_neigh_lookup_noref(dev, nexthop); //根據目的IP查找鄰居項是否存在 //如果沒有則創建鄰居項,然后通過dst_neigh_output 發包 if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) {/* 成功 */ int res = dst_neigh_output(dst, neigh, skb); /* 通過鄰居子系統輸出 */ rcu_read_unlock_bh(); return res; } rcu_read_unlock_bh(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); kfree_skb(skb); return -EINVAL; }
最后通過令鄰居子系統,調用dev_queue_xmit 將數據報文發送給鏈路層驅動