看代碼實現前,請先保證了解ipv6的概念,可以先看ipv6介紹一文。
code extract from 2.6.24. 在文件 net/ipv6/af_inet6.c 中包含了ipv6協議初始化的主函數。 static int __init inet6_init(void) { struct sk_buff *dummy_skb; struct list_head *r; int err; //inet6_skb_parm必須小於等於skb中的cb BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); //初始化tcpv6_prot結構中的一些與slab相關的字段,然后添加到 proto_list 全局連表 err = proto_register(&tcpv6_prot, 1); if (err) goto out; //udp協議同上 err = proto_register(&udpv6_prot, 1); if (err) goto out_unregister_tcp_proto; //udp-lite傳輸協議,主要用於多媒體傳輸,參考kernel中的 Documentation/networking/udplite.txt err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; //原始套接字同上 err = proto_register(&rawv6_prot, 1); if (err) goto out_unregister_udplite_proto; /* Register the socket-side information for inet6_create. */ for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) //初始化一個協議連表數組 INIT_LIST_HEAD(r); /* We MUST register RAW sockets before we create the ICMP6, IGMP6, or NDISC control sockets. */ //根據參數數據結構中標識的協議類型,把這數據結構添加到上面的協議連表數組中 inet6_register_protosw(&rawv6_protosw); /* Register the family here so that the init calls below will be able to create sockets. (?? is this dangerous ??) */ //注冊ipv6協議族,主要是注冊socket創建函數 err = sock_register(&inet6_family_ops); if (err) goto out_unregister_raw_proto; /* Initialise ipv6 mibs */ err = init_ipv6_mibs(); //所有ipv6相關的統計信息 if (err) goto out_unregister_sock; /* ipngwg API draft makes clear that the correct semantics for TCP and UDP is to consider one TCP and UDP instance in a host availiable by both INET and INET6 APIs and able to communicate via both network protocols. */ #ifdef CONFIG_SYSCTL ipv6_sysctl_register(); // ipv6協議proc條件項初始化 #endif //icmp協議注冊 err = icmpv6_init(&inet6_family_ops); if (err) goto icmp_fail; //鄰居協議(arp)初始化 err = ndisc_init(&inet6_family_ops); if (err) goto ndisc_fail; //igmp協議初始化 err = igmp6_init(&inet6_family_ops); if (err) goto igmp_fail; //ipv6協議相關的 netfilter 初始化 err = ipv6_netfilter_init(); if (err) goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS //注冊/proc/中協議統計輸出項 err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; if (tcp6_proc_init()) goto proc_tcp6_fail; if (udp6_proc_init()) goto proc_udp6_fail; if (udplite6_proc_init()) goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (ac6_proc_init()) goto proc_anycast6_fail; if (if6_proc_init()) goto proc_if6_fail; #endif ip6_route_init(); //ipv6 路由初始化 ip6_flowlabel_init();//ipv6 中流標記,注冊了輸出流標記的 proc //rtnetlink相關部分和路由模板中一些字段和其他一些功能的初始化 err = addrconf_init(); if (err) goto addrconf_fail; /* Init v6 extension headers. */ //ipv6 新添加的擴展頭初始化,參考ipv6介紹 ipv6_rthdr_init(); ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); /* Init v6 transport protocols. */ //最主要的傳輸層協議初始化 udpv6_init(); udplitev6_init(); tcpv6_init(); //最后注冊ipv6協議,注冊協議處理函數 ipv6_packet_init(); err = 0; out: return err; ...... //下面就是錯誤處理的過程 } 下面我們主要看ipv6協議部分流程,其他部分在各自相關文章中介紹。 ipv6擴展頭,路由包頭注冊 void __init ipv6_rthdr_init(void) { if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0) printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); }; ipv6擴展頭,分片包頭注冊 void __init ipv6_frag_init(void) { if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } void __init ipv6_nodata_init(void) { if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0) printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n"); } ipv6擴展頭,目的選項包頭注冊 void __init ipv6_destopt_init(void) { if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0) printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n"); } 注冊ipv6協議處理函數 void __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); } 當netif_receive_skb函數向上層遞交skb時會根據協議類型調用相關的協議處理函數,那么就會調用到 ipv6_rcv函數了。 static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, }; ipv6協議處理函數 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; if (dev->nd_net != &init_net) { kfree_skb(skb); return 0; } //mac地址是其他主機的包 if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; } rcu_read_lock(); //獲取ipv6相關的配置結構 idev = __in6_dev_get(skb->dev); IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); //是否共享,如果是,新clone一個 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; } //清空保存擴展頭解析結果的數據結構 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); //保存接收這個數據包的設備索引 IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex; //有足夠的頭長度,ipv6是40字節 if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; hdr = ipv6_hdr(skb); //獲取頭 if (hdr->version != 6) //驗證版本 goto err; //傳輸頭(擴展頭)在網絡頭后面 skb->transport_header = skb->network_header + sizeof(*hdr); //保存下一個擴展頭協議在ipv6頭結構中的偏移 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); pkt_len = ntohs(hdr->payload_len); //ipv6負載數據長度 /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { //沒有使用擴展頭逐個跳段選項 if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { //數據長度不對 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } //如果skb->len > (pkt_len + sizeof(struct ipv6hdr))試着縮小skb->len的長度 //相對ipv4來說簡單多了,自己看吧 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); //重新獲取ip頭 } if (hdr->nexthdr == NEXTHDR_HOP) { //使用了擴展頭逐個跳段選項 if (ipv6_parse_hopopts(skb) < 0) {//處理這個選項 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; } } rcu_read_unlock(); //進入ipv6的netfilter然后調用ip6_rcv_finish return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); out: return 0; } 解析擴展頭逐個跳段中的巨量負載選項 int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); //獲取擴展頭結果結構 /* skb_network_header(skb) is equal to skb->data, and skb_network_header_len(skb) is always equal to * sizeof(struct ipv6hdr) by definition of hop-by-hop options. */ //驗證數據有足夠的長度 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + //下面的意思是取得擴展首部中的長度 ((skb_transport_header(skb)[1] + 1) << 3)))) { kfree_skb(skb); return -1; } opt->hop = sizeof(struct ipv6hdr); //40字節 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { //實際的解析工作 //把傳輸頭移動到擴展首部之后 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); //進行了ipv6擴展頭解析,保存下一個擴展頭協議字段的偏移 return 1; } return -1; } 解析tlv編碼的擴展選項頭 static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) { struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); //獲取網絡頭 int off = skb_network_header_len(skb); //獲取網絡頭長度 int len = (skb_transport_header(skb)[1] + 1) << 3; //首部擴展頭長度 if (skb_transport_offset(skb) + len > skb_headlen(skb)) //長度錯誤 goto bad; off += 2; //跳過下一個首部和首部擴展長度這兩個字節 len -= 2; while (len > 0) { int optlen = nh[off + 1] + 2; //獲取選項數據長度 + 2 (2是選項類型和選項數據長度兩字節) switch (nh[off]) { //選項類型 case IPV6_TLV_PAD0: //Pad1選項 optlen = 1; break; case IPV6_TLV_PADN://PadN選項 break; default: //其他選項 if (optlen > len) goto bad; for (curr = procs; curr->type >= 0; curr++) { if (curr->type == nh[off]) { //類型匹配,調用參數函數處理,參考下面ipv6選項處理 /* type specific length/alignment checks will be performed in the func(). */ if (curr->func(skb, off) == 0) return 0; break; } } if (curr->type < 0) { if (ip6_tlvopt_unknown(skb, off) == 0) //處理未知選項 return 0; } break; } off += optlen; //偏移增加,這樣到下一個選項 len -= optlen; //長度遞減 } if (len == 0) return 1; //正確解析完畢 bad: kfree_skb(skb); return 0; } 處理未知的選項 static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { //根據選項類型標識符的要求進行處理 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; case 1: /* drop packet */ break; case 3: /* Send ICMP if not a multicast address and drop packet */ /* Actually, it is redundant check. icmp_send will recheck in any case. */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) //目的是多播地址 break; case 2: /* send ICMP PARM PROB regardless and drop packet */ //給包的源地址發送一個 ICMP "參數存在問題", 編碼 2 的報文, 指針指向無法識別的選項類型 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); return 0; } kfree_skb(skb); return 0; } 到這需要解釋一下,上面解析ipv6選項只是解析了第一層的擴展頭,在后面可能還有其他擴展頭會在后面解析。 inline int ip6_rcv_finish( struct sk_buff *skb) { if (skb->dst == NULL) //沒有路由,進行路由查找 ip6_route_input(skb); //路由部分將在路由實現文章中介紹 return dst_input(skb); } static inline int dst_input(struct sk_buff *skb) { int err; for (;;) { err = skb->dst->input(skb); //調用路由的輸入函數 if (likely(err == 0)) return err; /* Oh, Jamal... Seems, I will not forgive you this mess. :-) */ if (unlikely(err != NET_XMIT_BYPASS)) return err; } } 現在我們假設包是到本地的,那么上面的input函數就是 int ip6_input(struct sk_buff *skb) { //進入ipv6 netfilter NF_IP6_LOCAL_IN hook 然后調用 ip6_input_finish return NF_HOOK(PF_INET6, NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish); } static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; struct sock *raw_sk; unsigned int nhoff; int nexthdr; u8 hash; struct inet6_dev *idev; /* Parse extension headers */ rcu_read_lock(); resubmit: idev = ip6_dst_idev(skb->dst); //將skb->data指針移動到傳輸層頭 if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff];//下一個擴展頭協議 //處理原始sock raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; //向上層協議棧遞交數據,看初始化時注冊的一些協議,主要是tcp,udp等,還包括一些ip擴展頭的處理 hash = nexthdr & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { struct ipv6hdr *hdr; /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ nf_reset(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); hdr = ipv6_hdr(skb); if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr) && !ipv6_is_mld(skb, nexthdr)) goto discard; } //處理 IPSEC v6 的相關部分 if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; ret = ipprot->handler(skb); //上層協議處理,看下面ipv6擴展頭處理 if (ret > 0) goto resubmit; //重新處理 else if (ret == 0) IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); } else { //沒有找到上層處理函數 if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev); } } else IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } rcu_read_unlock(); return 0; discard: IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; } [ipv6選項處理] static struct tlvtype_proc tlvprochopopt_lst[] = { { .type = IPV6_TLV_ROUTERALERT, .func = ipv6_hop_ra, }, { .type = IPV6_TLV_JUMBO, .func = ipv6_hop_jumbo, }, { -1, } }; 解析路由警告選項 static int ipv6_hop_ra(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); //獲取網絡頭 if (nh[optoff + 1] == 2) { //路由警告選項長度必須是2 ? rfc 要求是 4 IP6CB(skb)->ra = optoff; //記錄警告類型 return 1; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]); kfree_skb(skb); return 0; } 解析jumbo frame選項 static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); u32 pkt_len; //選項數據長度必須是4,選項類型必須是 0xc2, &3 后必須是2 if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); //獲取整個負載長度 if (pkt_len <= IPV6_MAXPLEN) { //小於65535 是不對地 IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (ipv6_hdr(skb)->payload_len) { //原ipv6頭中就不應該有負載長度了 IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { //長度超出了 skb 的實際長度 IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } //如果必要試圖縮減 skb 的長度 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; return 1; drop: kfree_skb(skb); return 0; } 目的選項處理 static struct tlvtype_proc tlvprocdestopt_lst[] = { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, }, #endif {-1, NULL} }; 解析目的選項 static int ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct in6_addr tmp_addr; int ret; if (opt->dsthao) { //已經處理 LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; opt->dst1 = 0; //獲取網絡頭后面的選項部分 hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { //長度要求 LIMIT_NETDEBUG(KERN_DEBUG "hao invalid option length = %d\n", hao->length); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { //地址不是單播 LIMIT_NETDEBUG(KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); goto discard; } //IPSEC相關 ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); if (unlikely(ret < 0)) goto discard; if (skb_cloned(skb)) { //如果包是cloned //分配新的內存數據 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto discard; //重新指向各頭 hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); ipv6h = ipv6_hdr(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; //把ip頭中的源地址與選項中的地址交換 ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); ipv6_addr_copy(&ipv6h->saddr, &hao->addr); ipv6_addr_copy(&hao->addr, &tmp_addr); if (skb->tstamp.tv64 == 0) __net_timestamp(skb); //記錄時間截 return 1; discard: kfree_skb(skb); return 0; } [/ipv6選項處理] [ipv6擴展頭處理] 我們只介紹根ipv6擴展頭相關的實現,像其他的擴展頭(tcp, udp)等雖然也是叫擴展頭但實際是傳輸層的內容,將在其他文章中介紹。 路由擴展首部 struct ipv6_rt_hdr { __u8 nexthdr; __u8 hdrlen; __u8 type; __u8 segments_left; /* type specific data variable length field */ }; 路由擴展首部處理結構 static struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; static int ipv6_rthdr_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; struct inet6_dev *idev; int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; int accept_source_route = ipv6_devconf.accept_source_route; idev = in6_dev_get(skb->dev); //包進入設備 if (idev) { if (accept_source_route > idev->cnf.accept_source_route) //默認數量大於了手動調節(proc中)的數量 accept_source_route = idev->cnf.accept_source_route; in6_dev_put(idev); } //skb長度和內存空間正確 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); //路由擴展頭 //是到多播地址或硬件地址不是到本機的地址 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } looped_back: if (hdr->segments_left == 0) { //根據rfc要求 分段剩余為0 switch (hdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was processed by own */ if (!addr) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } break; #endif default: break; } opt->lastopt = opt->srcrt = skb_network_header_len(skb); skb->transport_header += (hdr->hdrlen + 1) << 3; //下一個傳輸頭的位置 opt->dst0 = opt->dst1; opt->dst1 = 0; opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); //記錄下一個頭數據相對網絡頭的偏移量 return 1; } switch (hdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } break; #endif default: goto unknown_rh; } /* This is the routing header forwarding algorithm from RFC 2460, page 16. */ n = hdr->hdrlen >> 1; //計算路由首部中的地址數量 if (hdr->segments_left > n) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); return -1; } /* We are about to mangle packet header. Be careful! Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; //計算地址向量(地址列表)中要"訪問"的下一個地址 rthdr = (struct rt0_hdr *) hdr; addr = rthdr->addr; //指向地址列表首部 addr += i - 1; //移動到下一個地址 switch (hdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } break; #endif default: break; } if (ipv6_addr_is_multicast(addr)) { //這個地址是多播地址 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } //交換 IPv6 目的地址和這個地址 ipv6_addr_copy(&daddr, addr); ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr); ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr); dst_release(xchg(&skb->dst, NULL)); ip6_route_input(skb); //路由查找處理,將在其他文章中介紹 if (skb->dst->error) { skb_push(skb, skb->data - skb_network_header(skb)); dst_input(skb); return -1; } if (skb->dst->dev->flags & IFF_LOOPBACK) { //路由查找后要發送到的目的設備是回環 if (ipv6_hdr(skb)->hop_limit <= 1) { //跳數限制小於1 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); //給源地址發送一個 ICMP "超時 – 傳輸超過跳數限制" 的報文, 並且拋棄此包 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); kfree_skb(skb); return -1; } ipv6_hdr(skb)->hop_limit--; goto looped_back; } //將data之中移動到網絡頭 skb_push(skb, skb->data - skb_network_header(skb)); dst_input(skb); //這時包應該被轉發了 return -1; unknown_rh: IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; } ipv6分配包擴展首部處理 static struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, }; static int ipv6_frag_rcv(struct sk_buff *skb) { struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len == 0) { //是Jumbo payload,不是分片包 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } //有碎片頭空間 if (!pskb_may_pull(skb, (skb_transport_offset(skb) + sizeof(struct frag_hdr)))) { IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); //分片頭 if (!(fhdr->frag_off & htons(0xFFF9))) { //沒有分片偏移,不是分片包 /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); //傳輸頭向后移動到下一個頭 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); return 1; } if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) //內存使用超過限制 ip6_evictor(ip6_dst_idev(skb->dst)); //查找或創建分片隊列頭 if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { int ret; spin_lock(&fq->q.lock); ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); //入隊重組 spin_unlock(&fq->q.lock); fq_put(fq); return ret; } IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } static __inline__ struct frag_queue * fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) { struct inet_frag_queue *q; struct ip6_create_arg arg; unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; hash = ip6qhashfn(id, src, dst); //id,源,目的進行 hash q = inet_frag_find(&ip6_frags, &arg, hash); //查找或創建 if (q == NULL) goto oom; return container_of(q, struct frag_queue, q); //成功返回 oom: //沒內存了 IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); return NULL; } struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, unsigned int hash) { struct inet_frag_queue *q; struct hlist_node *n; read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { //在hash桶中查找 if (f->match(q, key)) { //調用匹配函數進行匹配,具體函數很簡單參考初始化時的ipv6_frag_init函數 atomic_inc(&q->refcnt); read_unlock(&f->lock); return q; } } //沒有找到就創建一個 return inet_frag_create(f, key, hash); } 創建分片隊列 static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, unsigned int hash) { struct inet_frag_queue *q; q = inet_frag_alloc(f, arg); //分配一個 if (q == NULL) return NULL; //添加到 hash 表 return inet_frag_intern(q, f, hash, arg); } static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) { struct inet_frag_queue *q; q = kzalloc(f->qsize, GFP_ATOMIC); //分配一個隊列頭,大小是 sizeof(struct frag_queue) if (q == NULL) return NULL; f->constructor(q, arg); //拷貝地址和 id 到隊列頭結構中 atomic_add(f->qsize, &f->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); return q; } static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, struct inet_frags *f, unsigned int hash, void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif write_lock(&f->lock); #ifdef CONFIG_SMP //其他cpu可能已經創建了一個,所以要再次檢查 hlist_for_each_entry(qp, n, &f->hash[hash], list) { if (f->match(qp, arg)) { //已經創建 atomic_inc(&qp->refcnt); write_unlock(&f->lock); qp_in->last_in |= COMPLETE; inet_frag_put(qp_in, f); //釋放新分配的 return qp; } } #endif qp = qp_in; if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) //啟動定時器 atomic_inc(&qp->refcnt); //增加引用計數,然后添加到hash表 atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); list_add_tail(&qp->lru_list, &f->lru_list); f->nqueues++; write_unlock(&f->lock); return qp; } 入隊重組 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; struct net_device *dev; int offset, end; if (fq->q.last_in & COMPLETE) //重組已經完成 goto err; //分片開始位置 offset = ntohs(fhdr->frag_off) & ~0x7;//偏移必須8字節對齊 //分片在整個包中的結束位置 包負載長度 - 分片頭長度 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); //結束位置 > 65535 if ((unsigned int)end > IPV6_MAXPLEN) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); return -1; } //校驗和已經完成 if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); //減去分片包頭的校驗和 skb->csum = csum_sub(skb->csum, csum_partial(nh, (u8 *)(fhdr + 1) - nh, 0)); } //最后一個碎片包 if (!(fhdr->frag_off & htons(IP6_MF))) { /* If we already have some bits beyond end or have different end, the segment is corrupted. */ if (end < fq->q.len || ((fq->q.last_in & LAST_IN) && end != fq->q.len)) //分片出現錯誤 goto err; fq->q.last_in |= LAST_IN; //標識最后一個分片 fq->q.len = end; //記錄包總長度 } else { /* Check if the fragment is rounded to 8 bytes. Required by the RFC. */ if (end & 0x7) { //碎片結尾也需要8字節對齊 /* RFC2460 says always send parameter problem in this case. -DaveM */ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), PSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return -1; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.last_in & LAST_IN) goto err; fq->q.len = end; //記錄已經得到的碎片的最大長度 } } if (end == offset) //開始 = 結束 goto err; //skb->data 指向碎片首部頭后數據部分 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto err; //如果需要縮短skb的內存長度 if (pskb_trim_rcsum(skb, end - offset)) goto err; //找出碎片所在位置 prev = NULL; for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; } if (prev) { //有前一個碎片 //前一個碎片 (開始 + 長度) - 這個碎片的開始. 計算出重疊部分 int i = (FRAG6_CB(prev)->offset + prev->len) - offset; if (i > 0) { //有重疊 offset += i; //調整這個碎片的開始位置 if (end <= offset) //調整后出錯 goto err; if (!pskb_pull(skb, i))//skb->data += i; goto err; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } } //有下一個碎片,且開始位置 < 這個碎片的結束位置 while (next && FRAG6_CB(next)->offset < end) { //這個碎片的結束位置 - 下一個碎片的開始位置,計算重疊 int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ if (i < next->len) { //重疊長度 < 下一個碎片的長度 if (!pskb_pull(next, i)) //next->data += i; goto err; FRAG6_CB(next)->offset += i; //下一個碎片開始位置調整 fq->q.meat -= i; //總長度減少 if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; } else { //這個碎片完全復蓋了下一個碎片 struct sk_buff *free_it = next; //釋放這個碎片 next = next->next;//調整下一個碎片指針 //調整隊列指針 if (prev) prev->next = next; else fq->q.fragments = next; fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); //釋放被復蓋的包 } } FRAG6_CB(skb)->offset = offset; //這個碎片包記錄自己的開始位置 //插入這個碎片到隊列 skb->next = next; if (prev) prev->next = skb; else fq->q.fragments = skb; dev = skb->dev; if (dev) { fq->iif = dev->ifindex; skb->dev = NULL; } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; //累加總長度 atomic_add(skb->truesize, &ip6_frags.mem); if (offset == 0) { //偏移為0 fq->nhoffset = nhoff; fq->q.last_in |= FIRST_IN; //標識開始碎片 } //碎片已經聚齊,記錄長度 = 包中標識的長度 if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) return ip6_frag_reasm(fq, prev, dev); //重組 //沒有聚齊,移動隊列連表到lru連表尾部 write_lock(&ip6_frags.lock); list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); write_unlock(&ip6_frags.lock); return -1; err: IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } 重組ip頭 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; fq_kill(fq); //把這個重組隊列出隊 /* Make the one we just received the head. */ if (prev) { //下面是把head指向的skb復制到fp,然后把fp插入到head指向的位置 head = prev->next; fp = skb_clone(head, GFP_ATOMIC); if (!fp) goto out_oom; fp->next = head->next; prev->next = fp; //把真正的頭skb復制到head指針的skb skb_morph(head, fq->q.fragments); head->next = fq->q.fragments->next; kfree_skb(fq->q.fragments);//釋放原來的頭 fq->q.fragments = head; } /* Unfragmented part is taken from the first segment. */ //計算負載總長度 payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) //超過65535 goto out_oversize; /* Head of list must not be cloned. */ //如果skb被克隆,從新分配他的data if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split it to two chunks: the first with data and paged part * and the second, holding only fragments. */ if (skb_shinfo(head)->frag_list) {//如果頭自己已經被分片 struct sk_buff *clone; int i, plen = 0; if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) goto out_oom; //把這個clone插入到頭后 clone->next = head->next; head->next = clone; //把頭的分片給這個clone skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_shinfo(head)->frag_list = NULL; //頭使用了頁面,計算總長度 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_shinfo(head)->frags[i].size; clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; atomic_add(clone->truesize, &ip6_frags.mem); } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; //把傳輸頭(分片頭)中的下一個頭字段值賦給網絡頭中的下一個頭字段 skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; //把分片首部復蓋掉 memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr)); //調整相應的各個層的頭位置 head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); skb_shinfo(head)->frag_list = head->next; //保存碎片連表 skb_reset_transport_header(head);//重新調整網絡頭,現在指向分片頭后的頭 skb_push(head, head->data - skb_network_header(head));//使head->data指向網絡頭 atomic_sub(head->truesize, &ip6_frags.mem); for (fp = head->next; fp; fp = fp->next) { //統計分片總長度 head->data_len += fp->len; head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); //添加各分片的累加和 head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip6_frags.mem); } head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); //總長度 IP6CB(head)->nhoff = nhoff; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) //添加網絡頭累加和 head->csum = csum_partial(skb_network_header(head), skb_network_header_len(head), head->csum); rcu_read_lock(); IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; return 1; ...... //下面是錯誤處理 } 無數據擴展頭 static struct inet6_protocol nodata_protocol = { .handler = ipv6_nodata_rcv, .flags = INET6_PROTO_NOPOLICY, }; static int ipv6_nodata_rcv(struct sk_buff *skb) { kfree_skb(skb); return 0; } 目的選項首部處理 static struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif struct dst_entry *dst; //長度驗證 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } opt->lastopt = opt->dst1 = skb_network_header_len(skb); //網絡頭長度 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) dstbuf = opt->dst1; #endif dst = dst_clone(skb->dst); //增加dst的引用計數 //解析tlv,上面已經看到過了 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { dst_release(dst); skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; //調整網絡頭位置 opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; #endif return 1; } IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); dst_release(dst); return -1; } [/ipv6擴展頭處理]