假定客戶端執行主動打開,服務器執行被動打開,客戶端發送syn包到服務器,服務器接收該包,進行建立連接請求的相關處理,即第一次握手;本文主要分析第一次握手中被動打開端的處理流程,主動打開端的處理請查閱本博客內另外的文章;
IPv4攜帶的TCP報文最終會進入到tcp_v4_do_rcv函數,服務器准備接收連接請求時,是處於LISTEN狀態的,所以我們只關心這部分的相關處理;函數中LISTEN條件分支中,主要是對啟用了syn cookies的檢查,我們暫且不做分析;主要看tcp_rcv_state_process這個函數,syn連接請求最終會進入到該函數中進行處理;
1 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 2 { 3 struct sock *rsk; 4 5 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 6 struct dst_entry *dst = sk->sk_rx_dst; 7 8 sock_rps_save_rxhash(sk, skb); 9 sk_mark_napi_id(sk, skb); 10 if (dst) { 11 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 12 !dst->ops->check(dst, 0)) { 13 dst_release(dst); 14 sk->sk_rx_dst = NULL; 15 } 16 } 17 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); 18 return 0; 19 } 20 21 if (tcp_checksum_complete(skb)) 22 goto csum_err; 23 24 /* LISTEN狀態處理 */ 25 if (sk->sk_state == TCP_LISTEN) { 26 27 /* syn cookies檢查 */ 28 struct sock *nsk = tcp_v4_cookie_check(sk, skb); 29 30 if (!nsk) 31 goto discard; 32 if (nsk != sk) { 33 if (tcp_child_process(sk, nsk, skb)) { 34 rsk = nsk; 35 goto reset; 36 } 37 return 0; 38 } 39 } else 40 sock_rps_save_rxhash(sk, skb); 41 42 /* ESTABLISHED and TIME_WAIT狀態以外的其他狀態處理 */ 43 if (tcp_rcv_state_process(sk, skb)) { 44 rsk = sk; 45 goto reset; 46 } 47 return 0; 48 49 reset: 50 tcp_v4_send_reset(rsk, skb); 51 discard: 52 kfree_skb(skb); 53 /* Be careful here. If this function gets more complicated and 54 * gcc suffers from register pressure on the x86, sk (in %ebx) 55 * might be destroyed here. This current version compiles correctly, 56 * but you have been warned. 57 */ 58 return 0; 59 60 csum_err: 61 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 62 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 63 goto discard; 64 }
tcp_rcv_state_process對syn包進行處理,不接收ack包,丟棄含有rst和fin的包,對於合格的syn請求包,則繼續調用conn_request回調進行處理,TCPv4中對應的函數為tcp_v4_conn_request;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) 2 { 3 /* 省略了一些無關代碼 */ 4 5 switch (sk->sk_state) { 6 case TCP_CLOSE: 7 goto discard; 8 9 case TCP_LISTEN: 10 /* 不接收ack */ 11 if (th->ack) 12 return 1; 13 14 /* 丟棄帶有rst標記的包 */ 15 if (th->rst) 16 goto discard; 17 18 /* 處理syn請求包 */ 19 if (th->syn) { 20 /* 丟棄帶有fin標志的包 */ 21 if (th->fin) 22 goto discard; 23 /* It is possible that we process SYN packets from backlog, 24 * so we need to make sure to disable BH right there. 25 */ 26 local_bh_disable(); 27 /* 進入連接請求處理 */ 28 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; 29 local_bh_enable(); 30 31 /* 連接失敗 */ 32 if (!acceptable) 33 return 1; 34 35 /* 連接成功 */ 36 consume_skb(skb); 37 return 0; 38 } 39 goto discard; 40 } 41 /* 省略了一些無關代碼 */ 42 }
tcp_v4_conn_request函數對傳入包的路由類型進行檢查,如果是發往廣播或者組播的,則丟棄該包,合法包進入tcp_conn_request函數繼續進行請求處理,其中參數傳入了請求控制塊操作函數結構指針;
1 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 2 { 3 /* Never answer to SYNs send to broadcast or multicast */ 4 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 5 goto drop; 6 7 return tcp_conn_request(&tcp_request_sock_ops, 8 &tcp_request_sock_ipv4_ops, sk, skb); 9 10 drop: 11 tcp_listendrop(sk); 12 return 0; 13 }
tcp_conn_request函數為syn請求的核心處理流程,我們暫且忽略其中的syn cookies和fastopen相關流程,其核心功能為分析請求參數,新建連接請求控制塊,注意,新建請求控制操作中會將連接狀態更新為TCP_NEW_SYN_RECV ,並初始化相關成員,初始化完畢之后,加入到半連接隊列accept queue中,然后恢復syn+ack包給客戶端;
1 int tcp_conn_request(struct request_sock_ops *rsk_ops, 2 const struct tcp_request_sock_ops *af_ops, 3 struct sock *sk, struct sk_buff *skb) 4 { 5 struct tcp_fastopen_cookie foc = { .len = -1 }; 6 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; 7 struct tcp_options_received tmp_opt; 8 struct tcp_sock *tp = tcp_sk(sk); 9 struct net *net = sock_net(sk); 10 struct sock *fastopen_sk = NULL; 11 struct dst_entry *dst = NULL; 12 struct request_sock *req; 13 bool want_cookie = false; 14 struct flowi fl; 15 16 /* TW buckets are converted to open requests without 17 * limitations, they conserve resources and peer is 18 * evidently real one. 19 */ 20 if ((net->ipv4.sysctl_tcp_syncookies == 2 || 21 inet_csk_reqsk_queue_is_full(sk)) && !isn) { 22 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); 23 if (!want_cookie) 24 goto drop; 25 } 26 27 /* 如果連接隊列長度已達到上限,丟包 */ 28 if (sk_acceptq_is_full(sk)) { 29 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 30 goto drop; 31 } 32 33 /* 34 分配請求控制塊,請求控制塊的操作指向rsk_ops , 35 注意: 這個函數將連接狀態更新為TCP_NEW_SYN_RECV 36 */ 37 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie); 38 if (!req) 39 goto drop; 40 41 /* 初始化特定操作函數 */ 42 tcp_rsk(req)->af_specific = af_ops; 43 tcp_rsk(req)->ts_off = 0; 44 45 /* 情況保存tcp選項的相關字段 */ 46 tcp_clear_options(&tmp_opt); 47 48 /* 初始化最大mss */ 49 tmp_opt.mss_clamp = af_ops->mss_clamp; 50 /* 初始化用戶定義mss */ 51 tmp_opt.user_mss = tp->rx_opt.user_mss; 52 53 /* 解析tcp選項,其中會取user_mss和對端通告mss的較小值記錄到mss_clamp中 */ 54 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); 55 56 if (want_cookie && !tmp_opt.saw_tstamp) 57 tcp_clear_options(&tmp_opt); 58 59 /* 記錄是否在syn中有時間戳選項 */ 60 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 61 62 /* 使用對端信息對請求控制塊做初始化 */ 63 tcp_openreq_init(req, &tmp_opt, skb, sk); 64 65 /* 不做源地址檢查?? */ 66 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; 67 68 /* Note: tcp_v6_init_req() might override ir_iif for link locals */ 69 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); 70 71 /* 初始化控制塊中的目的地址,源地址,ip選項 */ 72 af_ops->init_req(req, sk, skb); 73 74 if (security_inet_conn_request(sk, skb, req)) 75 goto drop_and_free; 76 77 /* 有時間戳選項,計算時間戳偏移?? */ 78 if (tmp_opt.tstamp_ok) 79 tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); 80 81 /* 不需要cookie,序號未初始化 */ 82 if (!want_cookie && !isn) { 83 /* Kill the following clause, if you dislike this way. */ 84 /* 未開啟cookie && 隊列剩余小於隊列大小的一半&& 對端驗證未通過 */ 85 if (!net->ipv4.sysctl_tcp_syncookies && 86 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 87 (net->ipv4.sysctl_max_syn_backlog >> 2)) && 88 !tcp_peer_is_proven(req, dst)) { 89 /* Without syncookies last quarter of 90 * backlog is filled with destinations, 91 * proven to be alive. 92 * It means that we continue to communicate 93 * to destinations, already remembered 94 * to the moment of synflood. 95 */ 96 pr_drop_req(req, ntohs(tcp_hdr(skb)->source), 97 rsk_ops->family); 98 goto drop_and_release; 99 } 100 101 /* 根據源目的地址和端口初始化序號 */ 102 isn = af_ops->init_seq(skb); 103 } 104 105 /* 沒有路由要查路由 */ 106 if (!dst) { 107 dst = af_ops->route_req(sk, &fl, req); 108 if (!dst) 109 goto drop_and_free; 110 } 111 112 /* ecn 相關*/ 113 tcp_ecn_create_request(req, skb, sk, dst); 114 115 /* syn cookies相關 */ 116 if (want_cookie) { 117 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); 118 req->cookie_ts = tmp_opt.tstamp_ok; 119 if (!tmp_opt.tstamp_ok) 120 inet_rsk(req)->ecn_ok = 0; 121 } 122 123 /* 初始化發送序號和hash */ 124 tcp_rsk(req)->snt_isn = isn; 125 tcp_rsk(req)->txhash = net_tx_rndhash(); 126 127 /* 窗口相關初始化todo */ 128 tcp_openreq_init_rwin(req, sk, dst); 129 130 if (!want_cookie) { 131 /* 記錄syn包頭 */ 132 tcp_reqsk_record_syn(sk, req, skb); 133 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); 134 } 135 136 /* fastopen相關 */ 137 if (fastopen_sk) { 138 af_ops->send_synack(fastopen_sk, dst, &fl, req, 139 &foc, TCP_SYNACK_FASTOPEN); 140 /* Add the child socket directly into the accept queue */ 141 inet_csk_reqsk_queue_add(sk, req, fastopen_sk); 142 sk->sk_data_ready(sk); 143 bh_unlock_sock(fastopen_sk); 144 sock_put(fastopen_sk); 145 } else { 146 /* 不是fastopen */ 147 tcp_rsk(req)->tfo_listener = false; 148 149 /* 加入ehash,啟動請求重傳定時器 */ 150 if (!want_cookie) 151 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 152 153 /* 發送syn+ack */ 154 af_ops->send_synack(sk, dst, &fl, req, &foc, 155 !want_cookie ? TCP_SYNACK_NORMAL : 156 TCP_SYNACK_COOKIE); 157 if (want_cookie) { 158 reqsk_free(req); 159 return 0; 160 } 161 } 162 reqsk_put(req); 163 return 0; 164 165 drop_and_release: 166 dst_release(dst); 167 drop_and_free: 168 reqsk_free(req); 169 drop: 170 tcp_listendrop(sk); 171 return 0; 172 }