概述
在ip_local_deliver中,如果檢測到是分片包,則需要進行分片重組;
其涉及的函數調用關系如下所示:
1 /** 2 * ip_local_deliver 3 * |-->ip_is_fragment //判斷是否為分片包 4 * |-->ip_defrag //分片緩存&重組 5 * |-->ip_find //查找ipq 6 * | |-->ip_frag_find //查找frag_queue 7 * | 8 * |-->ip_defrag_queue //分片接收組合 9 * |-->ip_frag_reasm //接收完整的分片組成新的ip包 10 */
函數源碼分析
ip_local_deliver
1 /* 2 * Deliver IP Packets to the higher protocol layers. 3 */ 4 int ip_local_deliver(struct sk_buff *skb) 5 { 6 /* 7 * Reassemble IP fragments. 8 */ 9 struct net *net = dev_net(skb->dev); 10 11 /* 分片重組 */ 12 if (ip_is_fragment(ip_hdr(skb))) { 13 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER)) 14 return 0; 15 } 16 17 /* 經過LOCAL_IN鈎子點 */ 18 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, 19 net, NULL, skb, skb->dev, NULL, 20 ip_local_deliver_finish); 21 }
ip_is_fragment
1 /* 判斷是否為分片包 */ 2 static inline bool ip_is_fragment(const struct iphdr *iph) 3 { 4 /* 5 根據(n-1)(mtu-ip頭)計算值,第一片的offset=0,其余偏移為1480倍數 6 除最后一片外,其余片標記MF 7 */ 8 return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; 9 }
ip_defrag
1 /* Process an incoming IP datagram fragment. */ 2 int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) 3 { 4 struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; 5 int vif = l3mdev_master_ifindex_rcu(dev); 6 struct ipq *qp; 7 8 __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); 9 skb_orphan(skb); 10 11 /* Lookup (or create) queue header */ 12 /* 查找或創建分片隊列 */ 13 qp = ip_find(net, ip_hdr(skb), user, vif); 14 15 /* 分片隊列存在 */ 16 if (qp) { 17 int ret; 18 19 spin_lock(&qp->q.lock); 20 21 /* 分片加入到隊列中,能重組則重組 */ 22 ret = ip_frag_queue(qp, skb); 23 24 spin_unlock(&qp->q.lock); 25 ipq_put(qp); 26 return ret; 27 } 28 29 /* 無法創建新的ip分片隊列,內存不足 */ 30 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); 31 kfree_skb(skb); 32 return -ENOMEM; 33 }
ip_find
1 /* Find the correct entry in the "incomplete datagrams" queue for 2 * this IP datagram, and create new one, if nothing is found. 3 */ 4 static struct ipq *ip_find(struct net *net, struct iphdr *iph, 5 u32 user, int vif) 6 { 7 struct inet_frag_queue *q; 8 struct ip4_create_arg arg; 9 unsigned int hash; 10 11 /* 記錄ip頭和輸入信息 */ 12 arg.iph = iph; 13 arg.user = user; 14 arg.vif = vif; 15 16 /* 通過id,源地址,目的地址,協議計算hash */ 17 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 18 19 /* 根據hash值查找或創建隊列 */ 20 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 21 if (IS_ERR_OR_NULL(q)) { 22 inet_frag_maybe_warn_overflow(q, pr_fmt()); 23 return NULL; 24 } 25 26 /* 返回隊列q對應的ipq */ 27 return container_of(q, struct ipq, q); 28 }
inet_frag_find
1 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 2 struct inet_frags *f, void *key, 3 unsigned int hash) 4 { 5 struct inet_frag_bucket *hb; 6 struct inet_frag_queue *q; 7 int depth = 0; 8 9 /* 分片內存已經超過了低限 */ 10 if (frag_mem_limit(nf) > nf->low_thresh) 11 /* 進行節點回收 */ 12 inet_frag_schedule_worker(f); 13 14 /* 找到hash桶 */ 15 hash &= (INETFRAGS_HASHSZ - 1); 16 hb = &f->hash[hash]; 17 18 spin_lock(&hb->chain_lock); 19 20 /* 遍歷鏈表 */ 21 hlist_for_each_entry(q, &hb->chain, list) { 22 23 /* 找到節點 */ 24 if (q->net == nf && f->match(q, key)) { 25 26 /* 增加引用計數 */ 27 atomic_inc(&q->refcnt); 28 spin_unlock(&hb->chain_lock); 29 30 /* 返回節點 */ 31 return q; 32 } 33 34 /* 記錄查找深度 */ 35 depth++; 36 } 37 spin_unlock(&hb->chain_lock); 38 39 /* 未找到的情況下 */ 40 41 /* 桶節點的鏈表深度不超過限定 */ 42 if (depth <= INETFRAGS_MAXDEPTH) 43 /* 創建節點返回 */ 44 return inet_frag_create(nf, f, key); 45 46 /* 如果已經超過了重建間隔時間,則重建 */ 47 if (inet_frag_may_rebuild(f)) { 48 /* 打重建標記 */ 49 if (!f->rebuild) 50 f->rebuild = true; 51 /* 進行節點回收 */ 52 inet_frag_schedule_worker(f); 53 } 54 55 return ERR_PTR(-ENOBUFS); 56 }
inet_frag_worker
1 static void inet_frag_worker(struct work_struct *work) 2 { 3 4 /* 本次回收的桶節點數 */ 5 unsigned int budget = INETFRAGS_EVICT_BUCKETS; 6 unsigned int i, evicted = 0; 7 struct inet_frags *f; 8 9 /* 找到hash表 */ 10 f = container_of(work, struct inet_frags, frags_work); 11 12 BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); 13 14 local_bh_disable(); 15 16 /* 從上次回收完的下一個節點開始,進行回收 */ 17 for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { 18 19 /* 回收並統計回收數量 */ 20 evicted += inet_evict_bucket(f, &f->hash[i]); 21 22 /* 下一個未回收桶節點 */ 23 i = (i + 1) & (INETFRAGS_HASHSZ - 1); 24 25 /* 回收節點數超過最大值,停止 */ 26 if (evicted > INETFRAGS_EVICT_MAX) 27 break; 28 } 29 30 /* 記錄下次需要開始回收的桶節點 */ 31 f->next_bucket = i; 32 33 local_bh_enable(); 34 35 /* 如果需要重建,則重建 */ 36 if (f->rebuild && inet_frag_may_rebuild(f)) 37 inet_frag_secret_rebuild(f); 38 }
inet_evict_bucket
1 static unsigned int 2 inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) 3 { 4 struct inet_frag_queue *fq; 5 struct hlist_node *n; 6 unsigned int evicted = 0; 7 HLIST_HEAD(expired); 8 9 spin_lock(&hb->chain_lock); 10 11 /* 遍歷桶下的鏈表 */ 12 hlist_for_each_entry_safe(fq, n, &hb->chain, list) { 13 14 /* 未超過限定,無需回收 */ 15 if (!inet_fragq_should_evict(fq)) 16 continue; 17 18 /* 定時器無法刪除 */ 19 if (!del_timer(&fq->timer)) 20 continue; 21 22 /* 能夠回收的節點加入到臨時hash */ 23 hlist_add_head(&fq->list_evictor, &expired); 24 25 /* 記錄回收數量 */ 26 ++evicted; 27 } 28 29 spin_unlock(&hb->chain_lock); 30 31 /* 依次調用回收函數進行回收 */ 32 hlist_for_each_entry_safe(fq, n, &expired, list_evictor) 33 f->frag_expire((unsigned long) fq); 34 35 /* 返回回收節點數 */ 36 return evicted; 37 }
inet_frag_secret_rebuild
1 static void inet_frag_secret_rebuild(struct inet_frags *f) 2 { 3 int i; 4 5 write_seqlock_bh(&f->rnd_seqlock); 6 7 /* 無需重建 */ 8 if (!inet_frag_may_rebuild(f)) 9 goto out; 10 11 /* 獲取新的用於計算hash的隨機值 */ 12 get_random_bytes(&f->rnd, sizeof(u32)); 13 14 /* 遍歷hash表 */ 15 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 16 struct inet_frag_bucket *hb; 17 struct inet_frag_queue *q; 18 struct hlist_node *n; 19 20 /* 取的桶節點 */ 21 hb = &f->hash[i]; 22 spin_lock(&hb->chain_lock); 23 24 /* 遍歷桶節點下面的鏈表 */ 25 hlist_for_each_entry_safe(q, n, &hb->chain, list) { 26 27 /* 計算hash */ 28 unsigned int hval = inet_frag_hashfn(f, q); 29 30 /* 節點不屬於當前桶 */ 31 if (hval != i) { 32 struct inet_frag_bucket *hb_dest; 33 34 /* 從當前桶中刪除該節點 */ 35 hlist_del(&q->list); 36 37 /* Relink to new hash chain. */ 38 /* 找到目標桶 */ 39 hb_dest = &f->hash[hval]; 40 41 /* This is the only place where we take 42 * another chain_lock while already holding 43 * one. As this will not run concurrently, 44 * we cannot deadlock on hb_dest lock below, if its 45 * already locked it will be released soon since 46 * other caller cannot be waiting for hb lock 47 * that we've taken above. 48 */ 49 spin_lock_nested(&hb_dest->chain_lock, 50 SINGLE_DEPTH_NESTING); 51 /* 節點加入目標桶的鏈表中 */ 52 hlist_add_head(&q->list, &hb_dest->chain); 53 spin_unlock(&hb_dest->chain_lock); 54 } 55 } 56 spin_unlock(&hb->chain_lock); 57 } 58 59 /* 設置重建標記和重建時間 */ 60 f->rebuild = false; 61 f->last_rebuild_jiffies = jiffies; 62 out: 63 write_sequnlock_bh(&f->rnd_seqlock); 64 }
ip_frag_queue
1 /* Add new segment to existing queue. */ 2 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) 3 { 4 struct sk_buff *prev, *next; 5 struct net_device *dev; 6 unsigned int fragsize; 7 int flags, offset; 8 int ihl, end; 9 int err = -ENOENT; 10 u8 ecn; 11 12 /* 分片接收完畢 */ 13 if (qp->q.flags & INET_FRAG_COMPLETE) 14 goto err; 15 16 /* 17 不是本機發出的報文 18 檢測存在dos攻擊 19 則重新初始化隊列 20 初始化失敗則釋放 21 */ 22 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && 23 unlikely(ip_frag_too_far(qp)) && 24 unlikely(err = ip_frag_reinit(qp))) { 25 ipq_kill(qp); 26 goto err; 27 } 28 29 ecn = ip4_frag_ecn(ip_hdr(skb)->tos); 30 31 /* 找到分片字段 */ 32 offset = ntohs(ip_hdr(skb)->frag_off); 33 34 /* 取前三位標記字段 */ 35 flags = offset & ~IP_OFFSET; 36 37 /* 取分片偏移 */ 38 offset &= IP_OFFSET; 39 40 /* 計算實際偏移數 */ 41 offset <<= 3; /* offset is in 8-byte chunks */ 42 43 /* ip頭部長度 */ 44 ihl = ip_hdrlen(skb); 45 46 /* Determine the position of this fragment. */ 47 48 /* 49 計算當前分片的結束位置 50 數據長度需要減去ip頭以及前面的偏移長度 51 */ 52 end = offset + skb->len - skb_network_offset(skb) - ihl; 53 err = -EINVAL; 54 55 /* Is this the final fragment? */ 56 /* 如果是最后一個分片 */ 57 if ((flags & IP_MF) == 0) { 58 /* If we already have some bits beyond end 59 * or have different end, the segment is corrupted. 60 */ 61 /* 當前分片結束為止小於分片總位置,有超過這個位置的數據 */ 62 /* 已經接收到最后分片,兩個分片位置不同 */ 63 if (end < qp->q.len || 64 ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) 65 goto err; 66 67 /* 標記最后一個分片到達*/ 68 qp->q.flags |= INET_FRAG_LAST_IN; 69 70 /* 設置長度為結束位置偏移 */ 71 qp->q.len = end; 72 } 73 /* 不是最后一個分片*/ 74 else { 75 /* 不是8字節對齊 */ 76 if (end&7) { 77 /* 截斷成8字節對齊 */ 78 end &= ~7; 79 80 /* 需要重新計算校驗和 */ 81 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 82 skb->ip_summed = CHECKSUM_NONE; 83 } 84 85 /* 接收到新片在已接收分片之后 */ 86 if (end > qp->q.len) { 87 /* Some bits beyond end -> corruption. */ 88 /* 最后一片已經達到,數據溢出 */ 89 if (qp->q.flags & INET_FRAG_LAST_IN) 90 goto err; 91 92 /* 記錄最大偏移分片結束偏移 */ 93 qp->q.len = end; 94 } 95 } 96 97 /* 無數據 */ 98 if (end == offset) 99 goto err; 100 101 err = -ENOMEM; 102 103 /* 去掉ip頭 */ 104 if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) 105 goto err; 106 107 /* 調整有效負載 */ 108 err = pskb_trim_rcsum(skb, end - offset); 109 if (err) 110 goto err; 111 112 /* Find out which fragments are in front and at the back of us 113 * in the chain of fragments so far. We must know where to put 114 * this fragment, right? 115 */ 116 /* 設置前一個分片為最后一個分片 */ 117 prev = qp->q.fragments_tail; 118 119 /* 只有當前分片或者 最后一個分片的偏移小於當前分片偏移 */ 120 if (!prev || FRAG_CB(prev)->offset < offset) { 121 next = NULL; 122 /* 找到該位置 */ 123 goto found; 124 } 125 126 /* 分片在前面 */ 127 128 prev = NULL; 129 /* 遍歷分片列表 */ 130 for (next = qp->q.fragments; next != NULL; next = next->next) { 131 /* 找到當前分片的下一個分片 */ 132 if (FRAG_CB(next)->offset >= offset) 133 break; /* bingo! */ 134 135 /* 記錄前一個分片 */ 136 prev = next; 137 } 138 139 found: 140 /* We found where to put this one. Check for overlap with 141 * preceding fragment, and, if needed, align things so that 142 * any overlaps are eliminated. 143 */ 144 145 /* 與前一片有重疊 */ 146 147 /* 存在前一個分片位置 */ 148 if (prev) { 149 150 /* 計算重疊部分 */ 151 int i = (FRAG_CB(prev)->offset + prev->len) - offset; 152 153 /* 如果有重疊 */ 154 if (i > 0) { 155 /* 偏移去掉重疊部分 */ 156 offset += i; 157 err = -EINVAL; 158 159 /* 去掉重疊后超過了尾端 */ 160 if (end <= offset) 161 goto err; 162 err = -ENOMEM; 163 /* 去掉重疊部分 */ 164 if (!pskb_pull(skb, i)) 165 goto err; 166 /* 需要重新計算校驗和 */ 167 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 168 skb->ip_summed = CHECKSUM_NONE; 169 } 170 } 171 172 err = -ENOMEM; 173 174 /* 與后片有重疊 */ 175 176 /* 存在下一個分片&& 分片偏移與當前有重疊 */ 177 while (next && FRAG_CB(next)->offset < end) { 178 179 /* 計算重疊部分 */ 180 int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ 181 182 /* 重疊部分未超過下一分片總長度 */ 183 if (i < next->len) { 184 /* Eat head of the next overlapped fragment 185 * and leave the loop. The next ones cannot overlap. 186 */ 187 /* 去掉下一個分片的重疊部分 */ 188 if (!pskb_pull(next, i)) 189 goto err; 190 /* 計算偏移 */ 191 FRAG_CB(next)->offset += i; 192 193 /* 減少已經接收到的長度 */ 194 qp->q.meat -= i; 195 196 /* 需要重新計算校驗和 */ 197 if (next->ip_summed != CHECKSUM_UNNECESSARY) 198 next->ip_summed = CHECKSUM_NONE; 199 break; 200 } 201 /* 重疊部分超過一片 */ 202 else { 203 204 /* 記錄當前分片用於釋放 */ 205 struct sk_buff *free_it = next; 206 207 /* Old fragment is completely overridden with 208 * new one drop it. 209 */ 210 /* 記錄下一分片 */ 211 next = next->next; 212 213 /* 調整指針 */ 214 if (prev) 215 prev->next = next; 216 else 217 qp->q.fragments = next; 218 219 /* 減少接收到長度 */ 220 qp->q.meat -= free_it->len; 221 222 /* 減少內存統計 */ 223 sub_frag_mem_limit(qp->q.net, free_it->truesize); 224 225 /* 釋放分片 */ 226 kfree_skb(free_it); 227 } 228 229 /* 繼續判斷新的下一片是否有重疊 */ 230 } 231 232 /* 設置新的偏移 */ 233 FRAG_CB(skb)->offset = offset; 234 235 /* Insert this fragment in the chain of fragments. */ 236 237 /* 插入該分片節點 */ 238 skb->next = next; 239 if (!next) 240 qp->q.fragments_tail = skb; 241 if (prev) 242 prev->next = skb; 243 else 244 qp->q.fragments = skb; 245 246 /* 記錄設備的輸入接口 */ 247 dev = skb->dev; 248 if (dev) { 249 qp->iif = dev->ifindex; 250 skb->dev = NULL; 251 } 252 253 /* 設置時間戳 */ 254 qp->q.stamp = skb->tstamp; 255 /* 設置接收到的數據長度 */ 256 qp->q.meat += skb->len; 257 qp->ecn |= ecn; 258 /* 增加內存統計 */ 259 add_frag_mem_limit(qp->q.net, skb->truesize); 260 261 /* 偏移為0,則標記首片到達 */ 262 if (offset == 0) 263 qp->q.flags |= INET_FRAG_FIRST_IN; 264 265 /* 分片大小 */ 266 fragsize = skb->len + ihl; 267 268 /* 記錄最大分片大小 */ 269 if (fragsize > qp->q.max_size) 270 qp->q.max_size = fragsize; 271 272 /* 記錄最大不允許分片的大小 */ 273 if (ip_hdr(skb)->frag_off & htons(IP_DF) && 274 fragsize > qp->max_df_size) 275 qp->max_df_size = fragsize; 276 277 /* 收尾分片均已到達,接收長度與分片長度一致 */ 278 if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 279 qp->q.meat == qp->q.len) { 280 unsigned long orefdst = skb->_skb_refdst; 281 282 skb->_skb_refdst = 0UL; 283 /* 重組成新的ip包 */ 284 err = ip_frag_reasm(qp, prev, dev); 285 skb->_skb_refdst = orefdst; 286 return err; 287 } 288 289 /* 釋放路由引用 */ 290 skb_dst_drop(skb); 291 292 /* 緩存了該包 */ 293 return -EINPROGRESS; 294 295 err: 296 kfree_skb(skb); 297 return err; 298 }
ip_frag_reasm
1 /* Build a new IP datagram from all its fragments. */ 2 3 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 4 struct net_device *dev) 5 { 6 struct net *net = container_of(qp->q.net, struct net, ipv4.frags); 7 struct iphdr *iph; 8 struct sk_buff *fp, *head = qp->q.fragments; 9 int len; 10 int ihlen; 11 int err; 12 u8 ecn; 13 14 /* 移除隊列 */ 15 ipq_kill(qp); 16 17 ecn = ip_frag_ecn_table[qp->ecn]; 18 if (unlikely(ecn == 0xff)) { 19 err = -EINVAL; 20 goto out_fail; 21 } 22 /* Make the one we just received the head. */ 23 24 /* 如果前一片存在 */ 25 if (prev) { 26 /* 頭部為當前片 */ 27 head = prev->next; 28 29 /* 克隆當前片 */ 30 fp = skb_clone(head, GFP_ATOMIC); 31 if (!fp) 32 goto out_nomem; 33 34 /* 設置下一片指針 */ 35 fp->next = head->next; 36 37 /* 下一片為空則記錄尾指針 */ 38 if (!fp->next) 39 qp->q.fragments_tail = fp; 40 41 //加入當前片 42 prev->next = fp; 43 44 /* 替換頭部,釋放原有頭部 */ 45 skb_morph(head, qp->q.fragments); 46 head->next = qp->q.fragments->next; 47 48 consume_skb(qp->q.fragments); 49 qp->q.fragments = head; 50 } 51 52 WARN_ON(!head); 53 WARN_ON(FRAG_CB(head)->offset != 0); 54 55 /* Allocate a new buffer for the datagram. */ 56 57 /* 計算新的ip包空間 */ 58 ihlen = ip_hdrlen(head); 59 len = ihlen + qp->q.len; 60 61 err = -E2BIG; 62 63 /* 長度超過最大值 */ 64 if (len > 65535) 65 goto out_oversize; 66 67 /* Head of list must not be cloned. */ 68 /* 頭部不能是克隆的 */ 69 if (skb_unclone(head, GFP_ATOMIC)) 70 goto out_nomem; 71 72 /* If the first fragment is fragmented itself, we split 73 * it to two chunks: the first with data and paged part 74 * and the second, holding only fragments. */ 75 76 /* 如果頭部有fraglist ,將其分開成兩個部分,頭不能有frag_list*/ 77 if (skb_has_frag_list(head)) { 78 struct sk_buff *clone; 79 int i, plen = 0; 80 81 clone = alloc_skb(0, GFP_ATOMIC); 82 if (!clone) 83 goto out_nomem; 84 85 /* 分開后的鏈接到頭部下一個 */ 86 clone->next = head->next; 87 head->next = clone; 88 89 /* 將fraglist給clone */ 90 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 91 skb_frag_list_init(head); 92 93 /* 重新調整長度等 */ 94 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 95 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 96 clone->len = clone->data_len = head->data_len - plen; 97 head->data_len -= clone->len; 98 head->len -= clone->len; 99 clone->csum = 0; 100 clone->ip_summed = head->ip_summed; 101 add_frag_mem_limit(qp->q.net, clone->truesize); 102 } 103 104 /* 將后面的分片鏈接到frag_list上 */ 105 skb_shinfo(head)->frag_list = head->next; 106 skb_push(head, head->data - skb_network_header(head)); 107 108 /*統計分片長度等信息 */ 109 for (fp=head->next; fp; fp = fp->next) { 110 head->data_len += fp->len; 111 head->len += fp->len; 112 if (head->ip_summed != fp->ip_summed) 113 head->ip_summed = CHECKSUM_NONE; 114 else if (head->ip_summed == CHECKSUM_COMPLETE) 115 head->csum = csum_add(head->csum, fp->csum); 116 head->truesize += fp->truesize; 117 } 118 sub_frag_mem_limit(qp->q.net, head->truesize); 119 120 /* 設置新的ip包字段值 */ 121 head->next = NULL; 122 head->dev = dev; 123 head->tstamp = qp->q.stamp; 124 /* 分片流程中如果frag_max_size比MTU小,則使用frag_max_size作為分片MTU */ 125 IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); 126 127 iph = ip_hdr(head); 128 iph->tot_len = htons(len); 129 iph->tos |= ecn; 130 131 /* When we set IP_DF on a refragmented skb we must also force a 132 * call to ip_fragment to avoid forwarding a DF-skb of size s while 133 * original sender only sent fragments of size f (where f < s). 134 * 135 * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest 136 * frag seen to avoid sending tiny DF-fragments in case skb was built 137 * from one very small df-fragment and one large non-df frag. 138 */ 139 /* 140 設置了DF標記,則輸出過程中需要強制進入分片流程, 141 來限制DF分片的大小, 不能超過原始原始的大小 142 143 最大分片長度==最大不分片的長度,打標記DF/IPSKB_FRAG_PMTU, 144 以避免發送小的DF分片和大的非DF分片 145 */ 146 if (qp->max_df_size == qp->q.max_size) { 147 /* 設置FRAG_PMTU */ 148 IPCB(head)->flags |= IPSKB_FRAG_PMTU; 149 /* 設置不分片 */ 150 iph->frag_off = htons(IP_DF); 151 } 152 /* 否則不設置標記 */ 153 else { 154 iph->frag_off = 0; 155 } 156 157 /* 計算校驗和 */ 158 ip_send_check(iph); 159 160 __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); 161 162 /* 重置隊列標記 */ 163 qp->q.fragments = NULL; 164 qp->q.fragments_tail = NULL; 165 return 0; 166 167 out_nomem: 168 net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); 169 err = -ENOMEM; 170 goto out_fail; 171 out_oversize: 172 net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); 173 out_fail: 174 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); 175 return err; 176 }