看代碼實現前,請先保證了解ipv6的概念,可以先看ipv6介紹一文。
code extract from 2.6.24.
在文件 net/ipv6/af_inet6.c 中包含了ipv6協議初始化的主函數。
static int __init inet6_init(void)
{
struct sk_buff *dummy_skb;
struct list_head *r;
int err;
//inet6_skb_parm必須小於等於skb中的cb
BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
//初始化tcpv6_prot結構中的一些與slab相關的字段,然后添加到 proto_list 全局連表
err = proto_register(&tcpv6_prot, 1);
if (err)
goto out;
//udp協議同上
err = proto_register(&udpv6_prot, 1);
if (err)
goto out_unregister_tcp_proto;
//udp-lite傳輸協議,主要用於多媒體傳輸,參考kernel中的 Documentation/networking/udplite.txt
err = proto_register(&udplitev6_prot, 1);
if (err)
goto out_unregister_udp_proto;
//原始套接字同上
err = proto_register(&rawv6_prot, 1);
if (err)
goto out_unregister_udplite_proto;
/* Register the socket-side information for inet6_create. */
for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) //初始化一個協議連表數組
INIT_LIST_HEAD(r);
/* We MUST register RAW sockets before we create the ICMP6, IGMP6, or NDISC control sockets. */
//根據參數數據結構中標識的協議類型,把這數據結構添加到上面的協議連表數組中
inet6_register_protosw(&rawv6_protosw);
/* Register the family here so that the init calls below will be able to create sockets. (?? is this dangerous ??) */
//注冊ipv6協議族,主要是注冊socket創建函數
err = sock_register(&inet6_family_ops);
if (err)
goto out_unregister_raw_proto;
/* Initialise ipv6 mibs */
err = init_ipv6_mibs(); //所有ipv6相關的統計信息
if (err)
goto out_unregister_sock;
/* ipngwg API draft makes clear that the correct semantics for TCP and UDP is to consider one TCP and UDP instance in a host availiable by both INET and INET6 APIs and able to communicate via both network protocols. */
#ifdef CONFIG_SYSCTL
ipv6_sysctl_register(); // ipv6協議proc條件項初始化
#endif
//icmp協議注冊
err = icmpv6_init(&inet6_family_ops);
if (err)
goto icmp_fail;
//鄰居協議(arp)初始化
err = ndisc_init(&inet6_family_ops);
if (err)
goto ndisc_fail;
//igmp協議初始化
err = igmp6_init(&inet6_family_ops);
if (err)
goto igmp_fail;
//ipv6協議相關的 netfilter 初始化
err = ipv6_netfilter_init();
if (err)
goto netfilter_fail;
/* Create /proc/foo6 entries. */
#ifdef CONFIG_PROC_FS //注冊/proc/中協議統計輸出項
err = -ENOMEM;
if (raw6_proc_init())
goto proc_raw6_fail;
if (tcp6_proc_init())
goto proc_tcp6_fail;
if (udp6_proc_init())
goto proc_udp6_fail;
if (udplite6_proc_init())
goto proc_udplite6_fail;
if (ipv6_misc_proc_init())
goto proc_misc6_fail;
if (ac6_proc_init())
goto proc_anycast6_fail;
if (if6_proc_init())
goto proc_if6_fail;
#endif
ip6_route_init(); //ipv6 路由初始化
ip6_flowlabel_init();//ipv6 中流標記,注冊了輸出流標記的 proc
//rtnetlink相關部分和路由模板中一些字段和其他一些功能的初始化
err = addrconf_init();
if (err)
goto addrconf_fail;
/* Init v6 extension headers. */
//ipv6 新添加的擴展頭初始化,參考ipv6介紹
ipv6_rthdr_init();
ipv6_frag_init();
ipv6_nodata_init();
ipv6_destopt_init();
/* Init v6 transport protocols. */
//最主要的傳輸層協議初始化
udpv6_init();
udplitev6_init();
tcpv6_init();
//最后注冊ipv6協議,注冊協議處理函數
ipv6_packet_init();
err = 0;
out:
return err;
...... //下面就是錯誤處理的過程
}
下面我們主要看ipv6協議部分流程,其他部分在各自相關文章中介紹。
ipv6擴展頭,路由包頭注冊
void __init ipv6_rthdr_init(void)
{
if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
};
ipv6擴展頭,分片包頭注冊
void __init ipv6_frag_init(void)
{
if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
ip6_frags.ctl = &ip6_frags_ctl;
ip6_frags.hashfn = ip6_hashfn;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.destructor = NULL;
ip6_frags.skb_free = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
inet_frags_init(&ip6_frags);
}
void __init ipv6_nodata_init(void)
{
if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
}
ipv6擴展頭,目的選項包頭注冊
void __init ipv6_destopt_init(void)
{
if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
}
注冊ipv6協議處理函數
void __init ipv6_packet_init(void)
{
dev_add_pack(&ipv6_packet_type);
}
當netif_receive_skb函數向上層遞交skb時會根據協議類型調用相關的協議處理函數,那么就會調用到 ipv6_rcv函數了。
static struct packet_type ipv6_packet_type = {
.type = __constant_htons(ETH_P_IPV6),
.func = ipv6_rcv,
.gso_send_check = ipv6_gso_send_check,
.gso_segment = ipv6_gso_segment,
};
ipv6協議處理函數
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
if (dev->nd_net != &init_net) {
kfree_skb(skb);
return 0;
}
//mac地址是其他主機的包
if (skb->pkt_type == PACKET_OTHERHOST) {
kfree_skb(skb);
return 0;
}
rcu_read_lock();
//獲取ipv6相關的配置結構
idev = __in6_dev_get(skb->dev);
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
//是否共享,如果是,新clone一個
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
goto out;
}
//清空保存擴展頭解析結果的數據結構
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
//保存接收這個數據包的設備索引
IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex;
//有足夠的頭長度,ipv6是40字節
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
hdr = ipv6_hdr(skb); //獲取頭
if (hdr->version != 6) //驗證版本
goto err;
//傳輸頭(擴展頭)在網絡頭后面
skb->transport_header = skb->network_header + sizeof(*hdr);
//保存下一個擴展頭協議在ipv6頭結構中的偏移
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
pkt_len = ntohs(hdr->payload_len); //ipv6負載數據長度
/* pkt_len may be zero if Jumbo payload option is present */
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { //沒有使用擴展頭逐個跳段選項
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { //數據長度不對
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
//如果skb->len > (pkt_len + sizeof(struct ipv6hdr))試着縮小skb->len的長度
//相對ipv4來說簡單多了,自己看吧
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
hdr = ipv6_hdr(skb); //重新獲取ip頭
}
if (hdr->nexthdr == NEXTHDR_HOP) { //使用了擴展頭逐個跳段選項
if (ipv6_parse_hopopts(skb) < 0) {//處理這個選項
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return 0;
}
}
rcu_read_unlock();
//進入ipv6的netfilter然后調用ip6_rcv_finish
return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
err:
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
drop:
rcu_read_unlock();
kfree_skb(skb);
out:
return 0;
}
解析擴展頭逐個跳段中的巨量負載選項
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb); //獲取擴展頭結果結構
/* skb_network_header(skb) is equal to skb->data, and skb_network_header_len(skb) is always equal to * sizeof(struct ipv6hdr) by definition of hop-by-hop options. */
//驗證數據有足夠的長度
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
//下面的意思是取得擴展首部中的長度
((skb_transport_header(skb)[1] + 1) << 3)))) {
kfree_skb(skb);
return -1;
}
opt->hop = sizeof(struct ipv6hdr); //40字節
if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { //實際的解析工作
//把傳輸頭移動到擴展首部之后
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr); //進行了ipv6擴展頭解析,保存下一個擴展頭協議字段的偏移
return 1;
}
return -1;
}
解析tlv編碼的擴展選項頭
static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
{
struct tlvtype_proc *curr;
const unsigned char *nh = skb_network_header(skb); //獲取網絡頭
int off = skb_network_header_len(skb); //獲取網絡頭長度
int len = (skb_transport_header(skb)[1] + 1) << 3; //首部擴展頭長度
if (skb_transport_offset(skb) + len > skb_headlen(skb)) //長度錯誤
goto bad;
off += 2; //跳過下一個首部和首部擴展長度這兩個字節
len -= 2;
while (len > 0) {
int optlen = nh[off + 1] + 2; //獲取選項數據長度 + 2 (2是選項類型和選項數據長度兩字節)
switch (nh[off]) { //選項類型
case IPV6_TLV_PAD0: //Pad1選項
optlen = 1;
break;
case IPV6_TLV_PADN://PadN選項
break;
default: //其他選項
if (optlen > len)
goto bad;
for (curr = procs; curr->type >= 0; curr++) {
if (curr->type == nh[off]) { //類型匹配,調用參數函數處理,參考下面ipv6選項處理
/* type specific length/alignment checks will be performed in the func(). */
if (curr->func(skb, off) == 0)
return 0;
break;
}
}
if (curr->type < 0) {
if (ip6_tlvopt_unknown(skb, off) == 0) //處理未知選項
return 0;
}
break;
}
off += optlen; //偏移增加,這樣到下一個選項
len -= optlen; //長度遞減
}
if (len == 0)
return 1; //正確解析完畢
bad:
kfree_skb(skb);
return 0;
}
處理未知的選項
static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
{
//根據選項類型標識符的要求進行處理
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
case 1: /* drop packet */
break;
case 3: /* Send ICMP if not a multicast address and drop packet */
/* Actually, it is redundant check. icmp_send will recheck in any case. */
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) //目的是多播地址
break;
case 2: /* send ICMP PARM PROB regardless and drop packet */
//給包的源地址發送一個 ICMP "參數存在問題", 編碼 2 的報文, 指針指向無法識別的選項類型
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return 0;
}
kfree_skb(skb);
return 0;
}
到這需要解釋一下,上面解析ipv6選項只是解析了第一層的擴展頭,在后面可能還有其他擴展頭會在后面解析。
inline int ip6_rcv_finish( struct sk_buff *skb)
{
if (skb->dst == NULL) //沒有路由,進行路由查找
ip6_route_input(skb); //路由部分將在路由實現文章中介紹
return dst_input(skb);
}
static inline int dst_input(struct sk_buff *skb)
{
int err;
for (;;) {
err = skb->dst->input(skb); //調用路由的輸入函數
if (likely(err == 0))
return err;
/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
if (unlikely(err != NET_XMIT_BYPASS))
return err;
}
}
現在我們假設包是到本地的,那么上面的input函數就是
int ip6_input(struct sk_buff *skb)
{
//進入ipv6 netfilter NF_IP6_LOCAL_IN hook 然后調用 ip6_input_finish
return NF_HOOK(PF_INET6, NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
}
static int ip6_input_finish(struct sk_buff *skb)
{
struct inet6_protocol *ipprot;
struct sock *raw_sk;
unsigned int nhoff;
int nexthdr;
u8 hash;
struct inet6_dev *idev;
/* Parse extension headers */
rcu_read_lock();
resubmit:
idev = ip6_dst_idev(skb->dst);
//將skb->data指針移動到傳輸層頭
if (!pskb_pull(skb, skb_transport_offset(skb)))
goto discard;
nhoff = IP6CB(skb)->nhoff;
nexthdr = skb_network_header(skb)[nhoff];//下一個擴展頭協議
//處理原始sock
raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
raw_sk = NULL;
//向上層協議棧遞交數據,看初始化時注冊的一些協議,主要是tcp,udp等,還包括一些ip擴展頭的處理
hash = nexthdr & (MAX_INET_PROTOS - 1);
if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
int ret;
if (ipprot->flags & INET6_PROTO_FINAL) {
struct ipv6hdr *hdr;
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded indefinitely. */
nf_reset(skb);
skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb));
hdr = ipv6_hdr(skb);
if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr)
&& !ipv6_is_mld(skb, nexthdr))
goto discard;
}
//處理 IPSEC v6 的相關部分
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
ret = ipprot->handler(skb); //上層協議處理,看下面ipv6擴展頭處理
if (ret > 0)
goto resubmit; //重新處理
else if (ret == 0)
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
} else { //沒有找到上層處理函數
if (!raw_sk) {
if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev);
}
} else
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
kfree_skb(skb);
}
rcu_read_unlock();
return 0;
discard:
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
kfree_skb(skb);
return 0;
}
[ipv6選項處理]
static struct tlvtype_proc tlvprochopopt_lst[] = {
{
.type = IPV6_TLV_ROUTERALERT,
.func = ipv6_hop_ra,
},
{
.type = IPV6_TLV_JUMBO,
.func = ipv6_hop_jumbo,
},
{ -1, }
};
解析路由警告選項
static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb); //獲取網絡頭
if (nh[optoff + 1] == 2) { //路由警告選項長度必須是2 ? rfc 要求是 4
IP6CB(skb)->ra = optoff; //記錄警告類型
return 1;
}
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]);
kfree_skb(skb);
return 0;
}
解析jumbo frame選項
static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
u32 pkt_len;
//選項數據長度必須是4,選項類型必須是 0xc2, &3 后必須是2
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]);
IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); //獲取整個負載長度
if (pkt_len <= IPV6_MAXPLEN) { //小於65535 是不對地
IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return 0;
}
if (ipv6_hdr(skb)->payload_len) { //原ipv6頭中就不應該有負載長度了
IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return 0;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { //長度超出了 skb 的實際長度
IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
//如果必要試圖縮減 skb 的長度
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
goto drop;
return 1;
drop:
kfree_skb(skb);
return 0;
}
目的選項處理
static struct tlvtype_proc tlvprocdestopt_lst[] = {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
{
.type = IPV6_TLV_HAO,
.func = ipv6_dest_hao,
},
#endif
{-1, NULL}
};
解析目的選項
static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct in6_addr tmp_addr;
int ret;
if (opt->dsthao) { //已經處理
LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
goto discard;
}
opt->dsthao = opt->dst1;
opt->dst1 = 0;
//獲取網絡頭后面的選項部分
hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
if (hao->length != 16) { //長度要求
LIMIT_NETDEBUG(KERN_DEBUG "hao invalid option length = %d\n", hao->length);
goto discard;
}
if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { //地址不是單播
LIMIT_NETDEBUG(KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
goto discard;
}
//IPSEC相關
ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
if (unlikely(ret < 0))
goto discard;
if (skb_cloned(skb)) { //如果包是cloned
//分配新的內存數據
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto discard;
//重新指向各頭
hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
ipv6h = ipv6_hdr(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
//把ip頭中的源地址與選項中的地址交換
ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
ipv6_addr_copy(&hao->addr, &tmp_addr);
if (skb->tstamp.tv64 == 0)
__net_timestamp(skb); //記錄時間截
return 1;
discard:
kfree_skb(skb);
return 0;
}
[/ipv6選項處理]
[ipv6擴展頭處理]
我們只介紹根ipv6擴展頭相關的實現,像其他的擴展頭(tcp, udp)等雖然也是叫擴展頭但實際是傳輸層的內容,將在其他文章中介紹。
路由擴展首部
struct ipv6_rt_hdr {
__u8 nexthdr;
__u8 hdrlen;
__u8 type;
__u8 segments_left;
/* type specific data variable length field */
};
路由擴展首部處理結構
static struct inet6_protocol rthdr_protocol = {
.handler = ipv6_rthdr_rcv,
.flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
static int ipv6_rthdr_rcv(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
struct in6_addr daddr;
struct inet6_dev *idev;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
int accept_source_route = ipv6_devconf.accept_source_route;
idev = in6_dev_get(skb->dev); //包進入設備
if (idev) {
if (accept_source_route > idev->cnf.accept_source_route) //默認數量大於了手動調節(proc中)的數量
accept_source_route = idev->cnf.accept_source_route;
in6_dev_put(idev);
}
//skb長度和內存空間正確
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); //路由擴展頭
//是到多播地址或硬件地址不是到本機的地址
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
looped_back:
if (hdr->segments_left == 0) { //根據rfc要求 分段剩余為0
switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
case IPV6_SRCRT_TYPE_2:
/* Silently discard type 2 header unless it was processed by own */
if (!addr) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
break;
#endif
default:
break;
}
opt->lastopt = opt->srcrt = skb_network_header_len(skb);
skb->transport_header += (hdr->hdrlen + 1) << 3; //下一個傳輸頭的位置
opt->dst0 = opt->dst1;
opt->dst1 = 0;
opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); //記錄下一個頭數據相對網絡頭的偏移量
return 1;
}
switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
case IPV6_SRCRT_TYPE_2:
if (accept_source_route < 0)
goto unknown_rh;
/* Silently discard invalid RTH type 2 */
if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
break;
#endif
default:
goto unknown_rh;
}
/* This is the routing header forwarding algorithm from RFC 2460, page 16. */
n = hdr->hdrlen >> 1; //計算路由首部中的地址數量
if (hdr->segments_left > n) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb)));
return -1;
}
/* We are about to mangle packet header. Be careful!
Do not damage packets queued somewhere. */
if (skb_cloned(skb)) {
/* the copy is a forwarded packet */
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return -1;
}
hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
i = n - --hdr->segments_left; //計算地址向量(地址列表)中要"訪問"的下一個地址
rthdr = (struct rt0_hdr *) hdr;
addr = rthdr->addr; //指向地址列表首部
addr += i - 1; //移動到下一個地址
switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
if (!ipv6_chk_home_addr(addr)) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
break;
#endif
default:
break;
}
if (ipv6_addr_is_multicast(addr)) { //這個地址是多播地址
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
//交換 IPv6 目的地址和這個地址
ipv6_addr_copy(&daddr, addr);
ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
dst_release(xchg(&skb->dst, NULL));
ip6_route_input(skb); //路由查找處理,將在其他文章中介紹
if (skb->dst->error) {
skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb);
return -1;
}
if (skb->dst->dev->flags & IFF_LOOPBACK) { //路由查找后要發送到的目的設備是回環
if (ipv6_hdr(skb)->hop_limit <= 1) { //跳數限制小於1
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
//給源地址發送一個 ICMP "超時 – 傳輸超過跳數限制" 的報文, 並且拋棄此包
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev);
kfree_skb(skb);
return -1;
}
ipv6_hdr(skb)->hop_limit--;
goto looped_back;
}
//將data之中移動到網絡頭
skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb); //這時包應該被轉發了
return -1;
unknown_rh:
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb));
return -1;
}
ipv6分配包擴展首部處理
static struct inet6_protocol frag_protocol =
{
.handler = ipv6_frag_rcv,
.flags = INET6_PROTO_NOPOLICY,
};
static int ipv6_frag_rcv(struct sk_buff *skb)
{
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr = ipv6_hdr(skb);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len == 0) { //是Jumbo payload,不是分片包
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
//有碎片頭空間
if (!pskb_may_pull(skb, (skb_transport_offset(skb) + sizeof(struct frag_hdr)))) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb); //分片頭
if (!(fhdr->frag_off & htons(0xFFF9))) { //沒有分片偏移,不是分片包
/* It is not a fragmented frame */
skb->transport_header += sizeof(struct frag_hdr); //傳輸頭向后移動到下一個頭
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
return 1;
}
if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) //內存使用超過限制
ip6_evictor(ip6_dst_idev(skb->dst));
//查找或創建分片隊列頭
if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) {
int ret;
spin_lock(&fq->q.lock);
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); //入隊重組
spin_unlock(&fq->q.lock);
fq_put(fq);
return ret;
}
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
static __inline__ struct frag_queue * fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
unsigned int hash;
arg.id = id;
arg.src = src;
arg.dst = dst;
hash = ip6qhashfn(id, src, dst); //id,源,目的進行 hash
q = inet_frag_find(&ip6_frags, &arg, hash); //查找或創建
if (q == NULL)
goto oom;
return container_of(q, struct frag_queue, q); //成功返回
oom: //沒內存了
IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
return NULL;
}
struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, unsigned int hash)
{
struct inet_frag_queue *q;
struct hlist_node *n;
read_lock(&f->lock);
hlist_for_each_entry(q, n, &f->hash[hash], list) { //在hash桶中查找
if (f->match(q, key)) { //調用匹配函數進行匹配,具體函數很簡單參考初始化時的ipv6_frag_init函數
atomic_inc(&q->refcnt);
read_unlock(&f->lock);
return q;
}
}
//沒有找到就創建一個
return inet_frag_create(f, key, hash);
}
創建分片隊列
static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, unsigned int hash)
{
struct inet_frag_queue *q;
q = inet_frag_alloc(f, arg); //分配一個
if (q == NULL)
return NULL;
//添加到 hash 表
return inet_frag_intern(q, f, hash, arg);
}
static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
{
struct inet_frag_queue *q;
q = kzalloc(f->qsize, GFP_ATOMIC); //分配一個隊列頭,大小是 sizeof(struct frag_queue)
if (q == NULL)
return NULL;
f->constructor(q, arg); //拷貝地址和 id 到隊列頭結構中
atomic_add(f->qsize, &f->mem);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
return q;
}
static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, struct inet_frags *f, unsigned int hash, void *arg)
{
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
struct hlist_node *n;
#endif
write_lock(&f->lock);
#ifdef CONFIG_SMP
//其他cpu可能已經創建了一個,所以要再次檢查
hlist_for_each_entry(qp, n, &f->hash[hash], list) {
if (f->match(qp, arg)) { //已經創建
atomic_inc(&qp->refcnt);
write_unlock(&f->lock);
qp_in->last_in |= COMPLETE;
inet_frag_put(qp_in, f); //釋放新分配的
return qp;
}
}
#endif
qp = qp_in;
if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) //啟動定時器
atomic_inc(&qp->refcnt);
//增加引用計數,然后添加到hash表
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &f->hash[hash]);
list_add_tail(&qp->lru_list, &f->lru_list);
f->nqueues++;
write_unlock(&f->lock);
return qp;
}
入隊重組
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
struct net_device *dev;
int offset, end;
if (fq->q.last_in & COMPLETE) //重組已經完成
goto err;
//分片開始位置
offset = ntohs(fhdr->frag_off) & ~0x7;//偏移必須8字節對齊
//分片在整個包中的結束位置 包負載長度 - 分片頭長度
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
//結束位置 > 65535
if ((unsigned int)end > IPV6_MAXPLEN) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb)));
return -1;
}
//校驗和已經完成
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
//減去分片包頭的校驗和
skb->csum = csum_sub(skb->csum, csum_partial(nh, (u8 *)(fhdr + 1) - nh, 0));
}
//最后一個碎片包
if (!(fhdr->frag_off & htons(IP6_MF))) {
/* If we already have some bits beyond end or have different end, the segment is corrupted. */
if (end < fq->q.len || ((fq->q.last_in & LAST_IN) && end != fq->q.len)) //分片出現錯誤
goto err;
fq->q.last_in |= LAST_IN; //標識最后一個分片
fq->q.len = end; //記錄包總長度
} else {
/* Check if the fragment is rounded to 8 bytes. Required by the RFC. */
if (end & 0x7) { //碎片結尾也需要8字節對齊
/* RFC2460 says always send parameter problem in this case. -DaveM */
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), PSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len));
return -1;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (fq->q.last_in & LAST_IN)
goto err;
fq->q.len = end; //記錄已經得到的碎片的最大長度
}
}
if (end == offset) //開始 = 結束
goto err;
//skb->data 指向碎片首部頭后數據部分
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
goto err;
//如果需要縮短skb的內存長度
if (pskb_trim_rcsum(skb, end - offset))
goto err;
//找出碎片所在位置
prev = NULL;
for(next = fq->q.fragments; next != NULL; next = next->next) {
if (FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
}
if (prev) { //有前一個碎片
//前一個碎片 (開始 + 長度) - 這個碎片的開始. 計算出重疊部分
int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
if (i > 0) { //有重疊
offset += i; //調整這個碎片的開始位置
if (end <= offset) //調整后出錯
goto err;
if (!pskb_pull(skb, i))//skb->data += i;
goto err;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->ip_summed = CHECKSUM_NONE;
}
}
//有下一個碎片,且開始位置 < 這個碎片的結束位置
while (next && FRAG6_CB(next)->offset < end) {
//這個碎片的結束位置 - 下一個碎片的開始位置,計算重疊
int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
if (i < next->len) { //重疊長度 < 下一個碎片的長度
if (!pskb_pull(next, i)) //next->data += i;
goto err;
FRAG6_CB(next)->offset += i; //下一個碎片開始位置調整
fq->q.meat -= i; //總長度減少
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
} else { //這個碎片完全復蓋了下一個碎片
struct sk_buff *free_it = next; //釋放這個碎片
next = next->next;//調整下一個碎片指針
//調整隊列指針
if (prev)
prev->next = next;
else
fq->q.fragments = next;
fq->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL); //釋放被復蓋的包
}
}
FRAG6_CB(skb)->offset = offset; //這個碎片包記錄自己的開始位置
//插入這個碎片到隊列
skb->next = next;
if (prev)
prev->next = skb;
else
fq->q.fragments = skb;
dev = skb->dev;
if (dev) {
fq->iif = dev->ifindex;
skb->dev = NULL;
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len; //累加總長度
atomic_add(skb->truesize, &ip6_frags.mem);
if (offset == 0) { //偏移為0
fq->nhoffset = nhoff;
fq->q.last_in |= FIRST_IN; //標識開始碎片
}
//碎片已經聚齊,記錄長度 = 包中標識的長度
if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
return ip6_frag_reasm(fq, prev, dev); //重組
//沒有聚齊,移動隊列連表到lru連表尾部
write_lock(&ip6_frags.lock);
list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
write_unlock(&ip6_frags.lock);
return -1;
err:
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
重組ip頭
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
unsigned int nhoff;
fq_kill(fq); //把這個重組隊列出隊
/* Make the one we just received the head. */
if (prev) {
//下面是把head指向的skb復制到fp,然后把fp插入到head指向的位置
head = prev->next;
fp = skb_clone(head, GFP_ATOMIC);
if (!fp)
goto out_oom;
fp->next = head->next;
prev->next = fp;
//把真正的頭skb復制到head指針的skb
skb_morph(head, fq->q.fragments);
head->next = fq->q.fragments->next;
kfree_skb(fq->q.fragments);//釋放原來的頭
fq->q.fragments = head;
}
/* Unfragmented part is taken from the first segment. */
//計算負載總長度
payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) //超過65535
goto out_oversize;
/* Head of list must not be cloned. */
//如果skb被克隆,從新分配他的data
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
goto out_oom;
/* If the first fragment is fragmented itself, we split it to two chunks: the first with data and paged part * and the second, holding only fragments. */
if (skb_shinfo(head)->frag_list) {//如果頭自己已經被分片
struct sk_buff *clone;
int i, plen = 0;
if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
goto out_oom;
//把這個clone插入到頭后
clone->next = head->next;
head->next = clone;
//把頭的分片給這個clone
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_shinfo(head)->frag_list = NULL;
//頭使用了頁面,計算總長度
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
plen += skb_shinfo(head)->frags[i].size;
clone->len = clone->data_len = head->data_len - plen;
head->data_len -= clone->len;
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
atomic_add(clone->truesize, &ip6_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
//把傳輸頭(分片頭)中的下一個頭字段值賦給網絡頭中的下一個頭字段
skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
//把分片首部復蓋掉
memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr));
//調整相應的各個層的頭位置
head->mac_header += sizeof(struct frag_hdr);
head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next; //保存碎片連表
skb_reset_transport_header(head);//重新調整網絡頭,現在指向分片頭后的頭
skb_push(head, head->data - skb_network_header(head));//使head->data指向網絡頭
atomic_sub(head->truesize, &ip6_frags.mem);
for (fp = head->next; fp; fp = fp->next) { //統計分片總長度
head->data_len += fp->len;
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum); //添加各分片的累加和
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &ip6_frags.mem);
}
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len); //總長度
IP6CB(head)->nhoff = nhoff;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE) //添加網絡頭累加和
head->csum = csum_partial(skb_network_header(head), skb_network_header_len(head), head->csum);
rcu_read_lock();
IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
return 1;
...... //下面是錯誤處理
}
無數據擴展頭
static struct inet6_protocol nodata_protocol = {
.handler = ipv6_nodata_rcv,
.flags = INET6_PROTO_NOPOLICY,
};
static int ipv6_nodata_rcv(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
目的選項首部處理
static struct inet6_protocol destopt_protocol = {
.handler = ipv6_destopt_rcv,
.flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
__u16 dstbuf;
#endif
struct dst_entry *dst;
//長度驗證
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
opt->lastopt = opt->dst1 = skb_network_header_len(skb); //網絡頭長度
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
dstbuf = opt->dst1;
#endif
dst = dst_clone(skb->dst); //增加dst的引用計數
//解析tlv,上面已經看到過了
if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
dst_release(dst);
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; //調整網絡頭位置
opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
opt->nhoff = dstbuf;
#else
opt->nhoff = opt->dst1;
#endif
return 1;
}
IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
dst_release(dst);
return -1;
}
[/ipv6擴展頭處理]