TCP輸入 之 tcp_rcv_established


概述

tcp_rcv_established用於處理已連接狀態下的輸入,處理過程根據首部預測字段分為快速路徑和慢速路徑;

1. 在快路中,對是有有數據負荷進行不同處理:

(1) 若無數據,則處理輸入ack,釋放該skb,檢查是否有數據發送,有則發送;

(2) 若有數據,檢查是否當前處理進程上下文,並且是期望讀取的數據,若是則將數據復制到用戶空間,若不滿足直接復制到用戶空間的情況,或者復制失敗,則需要將數據段加入到接收隊列中,加入方式包括合並到已有數據段,或者加入隊列尾部,並喚醒用戶進程通知有數據可讀;

2. 在慢路中,會進行更詳細的校驗,然后處理ack,處理緊急數據,接收數據段,其中數據段可能包含亂序的情況,最后進行是否有數據和ack的發送檢查;

源碼分析
  1 he first three cases are guaranteed by proper pred_flags setting,
  2  *    the rest is checked inline. Fast processing is turned on in
  3  *    tcp_data_queue when everything is OK.
  4  */
  5 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
  6              const struct tcphdr *th, unsigned int len)
  7 {
  8     struct tcp_sock *tp = tcp_sk(sk);
  9 
 10     skb_mstamp_get(&tp->tcp_mstamp);
 11     /* 路由為空,則重新設置路由 */
 12     if (unlikely(!sk->sk_rx_dst))
 13         inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 14     /*
 15      *    Header prediction.
 16      *    The code loosely follows the one in the famous
 17      *    "30 instruction TCP receive" Van Jacobson mail.
 18      *
 19      *    Van's trick is to deposit buffers into socket queue
 20      *    on a device interrupt, to call tcp_recv function
 21      *    on the receive process context and checksum and copy
 22      *    the buffer to user space. smart...
 23      *
 24      *    Our current scheme is not silly either but we take the
 25      *    extra cost of the net_bh soft interrupt processing...
 26      *    We do checksum and copy also but from device to kernel.
 27      */
 28 
 29     tp->rx_opt.saw_tstamp = 0;
 30 
 31     /*    pred_flags is 0xS?10 << 16 + snd_wnd
 32      *    if header_prediction is to be made
 33      *    'S' will always be tp->tcp_header_len >> 2
 34      *    '?' will be 0 for the fast path, otherwise pred_flags is 0 to
 35      *  turn it off    (when there are holes in the receive
 36      *     space for instance)
 37      *    PSH flag is ignored.
 38      */
 39 
 40     /* 快路檢查&& 序號正確 && ack序號正確 */
 41     if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
 42         TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
 43         !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
 44         /* tcp頭部長度 */
 45         int tcp_header_len = tp->tcp_header_len;
 46 
 47         /* Timestamp header prediction: tcp_header_len
 48          * is automatically equal to th->doff*4 due to pred_flags
 49          * match.
 50          */
 51 
 52         /* Check timestamp */
 53         /* 有時間戳選項 */
 54         if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
 55             /* No? Slow path! */
 56             /* 解析時間戳選項失敗,執行慢路 */
 57             if (!tcp_parse_aligned_timestamp(tp, th))
 58                 goto slow_path;
 59 
 60             /* If PAWS failed, check it more carefully in slow path */
 61             /* 序號回轉,執行慢路 */
 62             if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
 63                 goto slow_path;
 64 
 65             /* DO NOT update ts_recent here, if checksum fails
 66              * and timestamp was corrupted part, it will result
 67              * in a hung connection since we will drop all
 68              * future packets due to the PAWS test.
 69              */
 70         }
 71 
 72         /* 無數據 */
 73         if (len <= tcp_header_len) {
 74             /* Bulk data transfer: sender */
 75             if (len == tcp_header_len) {
 76                 /* Predicted packet is in window by definition.
 77                  * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 78                  * Hence, check seq<=rcv_wup reduces to:
 79                  */
 80                 /* 
 81                     有時間戳選項
 82                     && 所有接收的數據段均確認完畢 
 83                     保存時間戳
 84                   */
 85                 if (tcp_header_len ==
 86                     (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
 87                     tp->rcv_nxt == tp->rcv_wup)
 88                     tcp_store_ts_recent(tp);
 89 
 90                 /* We know that such packets are checksummed
 91                  * on entry.
 92                  */
 93                 /* 輸入ack處理 */
 94                 tcp_ack(sk, skb, 0);
 95                 /* 釋放skb */
 96                 __kfree_skb(skb);
 97 
 98                 /* 檢查是否有數據要發送,並檢查發送緩沖區大小 */
 99                 tcp_data_snd_check(sk);
100                 return;
101             }
102             /* 數據多小,比頭部都小,錯包 */
103             else { /* Header too small */
104                 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
105                 goto discard;
106             }
107         }
108         /* 有數據 */
109         else {
110             int eaten = 0;
111             bool fragstolen = false;
112 
113             /* 讀取進程上下文 */
114             if (tp->ucopy.task == current &&
115                 /* 期待讀取的和期待接收的序號一致 */
116                 tp->copied_seq == tp->rcv_nxt &&
117                 /* 數據<= 待讀取長度 */
118                 len - tcp_header_len <= tp->ucopy.len &&
119                 /* 控制塊被用戶空間鎖定 */
120                 sock_owned_by_user(sk)) {
121 
122                 /* 設置狀態為running??? */
123                 __set_current_state(TASK_RUNNING);
124 
125                 /* 拷貝數據到msghdr */
126                 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
127                     /* Predicted packet is in window by definition.
128                      * seq == rcv_nxt and rcv_wup <= rcv_nxt.
129                      * Hence, check seq<=rcv_wup reduces to:
130                      */
131                     /* 有時間戳選項&& 收到的數據段均已確認,更新時間戳 */
132                     if (tcp_header_len ==
133                         (sizeof(struct tcphdr) +
134                          TCPOLEN_TSTAMP_ALIGNED) &&
135                         tp->rcv_nxt == tp->rcv_wup)
136                         tcp_store_ts_recent(tp);
137 
138                     /* 接收端RTT估算 */
139                     tcp_rcv_rtt_measure_ts(sk, skb);
140 
141                     __skb_pull(skb, tcp_header_len);
142 
143                     /* 更新期望接收的序號 */
144                     tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
145                     NET_INC_STATS(sock_net(sk),
146                             LINUX_MIB_TCPHPHITSTOUSER);
147                     eaten = 1;
148                 }
149             }
150 
151             /* 未拷貝數據到用戶空間,或者拷貝失敗 */
152             if (!eaten) {
153                 /* 檢查校驗和 */
154                 if (tcp_checksum_complete(skb))
155                     goto csum_error;
156 
157                 /* skb長度> 預分配長度 */
158                 if ((int)skb->truesize > sk->sk_forward_alloc)
159                     goto step5;
160 
161                 /* Predicted packet is in window by definition.
162                  * seq == rcv_nxt and rcv_wup <= rcv_nxt.
163                  * Hence, check seq<=rcv_wup reduces to:
164                  */
165                 /* 有時間戳選項,且數據均已確認完畢,則更新時間戳 */
166                 if (tcp_header_len ==
167                     (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
168                     tp->rcv_nxt == tp->rcv_wup)
169                     tcp_store_ts_recent(tp);
170 
171                 /* 計算RTT */
172                 tcp_rcv_rtt_measure_ts(sk, skb);
173 
174                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
175 
176                 /* Bulk data transfer: receiver */
177                 /* 數據加入接收隊列 */
178                 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
179                               &fragstolen);
180             }
181 
182             tcp_event_data_recv(sk, skb);
183 
184             /* 確認序號確認了數據 */
185             if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
186                 /* Well, only one small jumplet in fast path... */
187                 /* 處理ack */
188                 tcp_ack(sk, skb, FLAG_DATA);
189                 /* 檢查是否有數據要發送,需要則發送 */
190                 tcp_data_snd_check(sk);
191                 /* 沒有ack要發送 */
192                 if (!inet_csk_ack_scheduled(sk))
193                     goto no_ack;
194             }
195 
196             /* 檢查是否有ack要發送,需要則發送 */
197             __tcp_ack_snd_check(sk, 0);
198 no_ack:
199             /* skb已經復制到用戶空間,則釋放之 */
200             if (eaten)
201                 kfree_skb_partial(skb, fragstolen);
202 
203             /* 喚醒用戶進程有數據讀取 */
204             sk->sk_data_ready(sk);
205             return;
206         }
207     }
208 
209 slow_path:
210     /* 長度錯誤|| 校驗和錯誤 */
211     if (len < (th->doff << 2) || tcp_checksum_complete(skb))
212         goto csum_error;
213 
214     /* 無ack,無rst,無syn */
215     if (!th->ack && !th->rst && !th->syn)
216         goto discard;
217 
218     /*
219      *    Standard slow path.
220      */
221     /* 種種校驗 */
222     if (!tcp_validate_incoming(sk, skb, th, 1))
223         return;
224 
225 step5:
226     /* 處理ack */
227     if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
228         goto discard;
229 
230     /* 計算rtt */
231     tcp_rcv_rtt_measure_ts(sk, skb);
232 
233     /* Process urgent data. */
234     /* 處理緊急數據 */
235     tcp_urg(sk, skb, th);
236 
237     /* step 7: process the segment text */
238     /* 數據段處理 */
239     tcp_data_queue(sk, skb);
240 
241     /* 發送數據檢查,有則發送 */
242     tcp_data_snd_check(sk);
243 
244     /* 發送ack檢查,有則發送 */
245     tcp_ack_snd_check(sk);
246     return;
247 
248 csum_error:
249     TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
250     TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
251 
252 discard:
253     tcp_drop(sk, skb);
254 }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM