概述
tcp_rcv_established用於處理已連接狀態下的輸入,處理過程根據首部預測字段分為快速路徑和慢速路徑;
1. 在快路中,對是有有數據負荷進行不同處理:
(1) 若無數據,則處理輸入ack,釋放該skb,檢查是否有數據發送,有則發送;
(2) 若有數據,檢查是否當前處理進程上下文,並且是期望讀取的數據,若是則將數據復制到用戶空間,若不滿足直接復制到用戶空間的情況,或者復制失敗,則需要將數據段加入到接收隊列中,加入方式包括合並到已有數據段,或者加入隊列尾部,並喚醒用戶進程通知有數據可讀;
2. 在慢路中,會進行更詳細的校驗,然后處理ack,處理緊急數據,接收數據段,其中數據段可能包含亂序的情況,最后進行是否有數據和ack的發送檢查;
源碼分析
1 he first three cases are guaranteed by proper pred_flags setting, 2 * the rest is checked inline. Fast processing is turned on in 3 * tcp_data_queue when everything is OK. 4 */ 5 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 6 const struct tcphdr *th, unsigned int len) 7 { 8 struct tcp_sock *tp = tcp_sk(sk); 9 10 skb_mstamp_get(&tp->tcp_mstamp); 11 /* 路由為空,則重新設置路由 */ 12 if (unlikely(!sk->sk_rx_dst)) 13 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); 14 /* 15 * Header prediction. 16 * The code loosely follows the one in the famous 17 * "30 instruction TCP receive" Van Jacobson mail. 18 * 19 * Van's trick is to deposit buffers into socket queue 20 * on a device interrupt, to call tcp_recv function 21 * on the receive process context and checksum and copy 22 * the buffer to user space. smart... 23 * 24 * Our current scheme is not silly either but we take the 25 * extra cost of the net_bh soft interrupt processing... 26 * We do checksum and copy also but from device to kernel. 27 */ 28 29 tp->rx_opt.saw_tstamp = 0; 30 31 /* pred_flags is 0xS?10 << 16 + snd_wnd 32 * if header_prediction is to be made 33 * 'S' will always be tp->tcp_header_len >> 2 34 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to 35 * turn it off (when there are holes in the receive 36 * space for instance) 37 * PSH flag is ignored. 38 */ 39 40 /* 快路檢查&& 序號正確 && ack序號正確 */ 41 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && 42 TCP_SKB_CB(skb)->seq == tp->rcv_nxt && 43 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { 44 /* tcp頭部長度 */ 45 int tcp_header_len = tp->tcp_header_len; 46 47 /* Timestamp header prediction: tcp_header_len 48 * is automatically equal to th->doff*4 due to pred_flags 49 * match. 50 */ 51 52 /* Check timestamp */ 53 /* 有時間戳選項 */ 54 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { 55 /* No? Slow path! */ 56 /* 解析時間戳選項失敗,執行慢路 */ 57 if (!tcp_parse_aligned_timestamp(tp, th)) 58 goto slow_path; 59 60 /* If PAWS failed, check it more carefully in slow path */ 61 /* 序號回轉,執行慢路 */ 62 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) 63 goto slow_path; 64 65 /* DO NOT update ts_recent here, if checksum fails 66 * and timestamp was corrupted part, it will result 67 * in a hung connection since we will drop all 68 * future packets due to the PAWS test. 69 */ 70 } 71 72 /* 無數據 */ 73 if (len <= tcp_header_len) { 74 /* Bulk data transfer: sender */ 75 if (len == tcp_header_len) { 76 /* Predicted packet is in window by definition. 77 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 78 * Hence, check seq<=rcv_wup reduces to: 79 */ 80 /* 81 有時間戳選項 82 && 所有接收的數據段均確認完畢 83 保存時間戳 84 */ 85 if (tcp_header_len == 86 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && 87 tp->rcv_nxt == tp->rcv_wup) 88 tcp_store_ts_recent(tp); 89 90 /* We know that such packets are checksummed 91 * on entry. 92 */ 93 /* 輸入ack處理 */ 94 tcp_ack(sk, skb, 0); 95 /* 釋放skb */ 96 __kfree_skb(skb); 97 98 /* 檢查是否有數據要發送,並檢查發送緩沖區大小 */ 99 tcp_data_snd_check(sk); 100 return; 101 } 102 /* 數據多小,比頭部都小,錯包 */ 103 else { /* Header too small */ 104 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 105 goto discard; 106 } 107 } 108 /* 有數據 */ 109 else { 110 int eaten = 0; 111 bool fragstolen = false; 112 113 /* 讀取進程上下文 */ 114 if (tp->ucopy.task == current && 115 /* 期待讀取的和期待接收的序號一致 */ 116 tp->copied_seq == tp->rcv_nxt && 117 /* 數據<= 待讀取長度 */ 118 len - tcp_header_len <= tp->ucopy.len && 119 /* 控制塊被用戶空間鎖定 */ 120 sock_owned_by_user(sk)) { 121 122 /* 設置狀態為running??? */ 123 __set_current_state(TASK_RUNNING); 124 125 /* 拷貝數據到msghdr */ 126 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { 127 /* Predicted packet is in window by definition. 128 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 129 * Hence, check seq<=rcv_wup reduces to: 130 */ 131 /* 有時間戳選項&& 收到的數據段均已確認,更新時間戳 */ 132 if (tcp_header_len == 133 (sizeof(struct tcphdr) + 134 TCPOLEN_TSTAMP_ALIGNED) && 135 tp->rcv_nxt == tp->rcv_wup) 136 tcp_store_ts_recent(tp); 137 138 /* 接收端RTT估算 */ 139 tcp_rcv_rtt_measure_ts(sk, skb); 140 141 __skb_pull(skb, tcp_header_len); 142 143 /* 更新期望接收的序號 */ 144 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 145 NET_INC_STATS(sock_net(sk), 146 LINUX_MIB_TCPHPHITSTOUSER); 147 eaten = 1; 148 } 149 } 150 151 /* 未拷貝數據到用戶空間,或者拷貝失敗 */ 152 if (!eaten) { 153 /* 檢查校驗和 */ 154 if (tcp_checksum_complete(skb)) 155 goto csum_error; 156 157 /* skb長度> 預分配長度 */ 158 if ((int)skb->truesize > sk->sk_forward_alloc) 159 goto step5; 160 161 /* Predicted packet is in window by definition. 162 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 163 * Hence, check seq<=rcv_wup reduces to: 164 */ 165 /* 有時間戳選項,且數據均已確認完畢,則更新時間戳 */ 166 if (tcp_header_len == 167 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && 168 tp->rcv_nxt == tp->rcv_wup) 169 tcp_store_ts_recent(tp); 170 171 /* 計算RTT */ 172 tcp_rcv_rtt_measure_ts(sk, skb); 173 174 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); 175 176 /* Bulk data transfer: receiver */ 177 /* 數據加入接收隊列 */ 178 eaten = tcp_queue_rcv(sk, skb, tcp_header_len, 179 &fragstolen); 180 } 181 182 tcp_event_data_recv(sk, skb); 183 184 /* 確認序號確認了數據 */ 185 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { 186 /* Well, only one small jumplet in fast path... */ 187 /* 處理ack */ 188 tcp_ack(sk, skb, FLAG_DATA); 189 /* 檢查是否有數據要發送,需要則發送 */ 190 tcp_data_snd_check(sk); 191 /* 沒有ack要發送 */ 192 if (!inet_csk_ack_scheduled(sk)) 193 goto no_ack; 194 } 195 196 /* 檢查是否有ack要發送,需要則發送 */ 197 __tcp_ack_snd_check(sk, 0); 198 no_ack: 199 /* skb已經復制到用戶空間,則釋放之 */ 200 if (eaten) 201 kfree_skb_partial(skb, fragstolen); 202 203 /* 喚醒用戶進程有數據讀取 */ 204 sk->sk_data_ready(sk); 205 return; 206 } 207 } 208 209 slow_path: 210 /* 長度錯誤|| 校驗和錯誤 */ 211 if (len < (th->doff << 2) || tcp_checksum_complete(skb)) 212 goto csum_error; 213 214 /* 無ack,無rst,無syn */ 215 if (!th->ack && !th->rst && !th->syn) 216 goto discard; 217 218 /* 219 * Standard slow path. 220 */ 221 /* 種種校驗 */ 222 if (!tcp_validate_incoming(sk, skb, th, 1)) 223 return; 224 225 step5: 226 /* 處理ack */ 227 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) 228 goto discard; 229 230 /* 計算rtt */ 231 tcp_rcv_rtt_measure_ts(sk, skb); 232 233 /* Process urgent data. */ 234 /* 處理緊急數據 */ 235 tcp_urg(sk, skb, th); 236 237 /* step 7: process the segment text */ 238 /* 數據段處理 */ 239 tcp_data_queue(sk, skb); 240 241 /* 發送數據檢查,有則發送 */ 242 tcp_data_snd_check(sk); 243 244 /* 發送ack檢查,有則發送 */ 245 tcp_ack_snd_check(sk); 246 return; 247 248 csum_error: 249 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 250 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 251 252 discard: 253 tcp_drop(sk, skb); 254 }
