概述
在主動關閉方發送了FIN之后,進入FIN_WAIT_1狀態,在此狀態收到了ACK,則進入FIN_WAIT_2狀態,而FIN_WAIT_2后續要做的工作是等待接收對端發過來的FIN包,並且發送ACK,進而進入到TIME_WAIT狀態;本文主要關注從FIN_WAIT_1進入FIN_WAIT_2狀態,以及在FIN_WAIT_2狀態來包或者定時器觸發后的處理流程;
進入FIN_WAIT_2
tcp_rcv_state_process函數中對於ack的處理步驟中,假如連接處於FIN_WAIT_1,且數據均已經被確認完,則進入TIME_WAIT_2狀態;如果無需在該狀態等待(linger2<0),或者收到了亂序數據段,則直接關閉連接;如果需要等待,則需要判斷等待時間與TIMEWAIT時間的大小關系,若>TIMEWAIT_LEN,則添加TIME_WAIT_2定時器,否則直接進入TIME_WAIT接管(其子狀態仍然是FIN_WAIT_2),接管之后會添加TIME_WAIT定時器;
另,tcp_close函數調用時,如果當前狀態是FIN_WAIT_2也會用相似方式進入TIME_WAIT接管,不再單獨介紹;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) 2 { 3 /* step 5: check the ACK field */ 4 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | 5 FLAG_UPDATE_TS_RECENT) > 0; 6 7 switch (sk->sk_state) { 8 case TCP_FIN_WAIT1: { 9 int tmo; 10 11 /* If we enter the TCP_FIN_WAIT1 state and we are a 12 * Fast Open socket and this is the first acceptable 13 * ACK we have received, this would have acknowledged 14 * our SYNACK so stop the SYNACK timer. 15 */ 16 if (req) { 17 /* Return RST if ack_seq is invalid. 18 * Note that RFC793 only says to generate a 19 * DUPACK for it but for TCP Fast Open it seems 20 * better to treat this case like TCP_SYN_RECV 21 * above. 22 */ 23 if (!acceptable) 24 return 1; 25 /* We no longer need the request sock. */ 26 reqsk_fastopen_remove(sk, req, false); 27 tcp_rearm_rto(sk); 28 } 29 30 /* 發送數據未確認完畢 */ 31 if (tp->snd_una != tp->write_seq) 32 break; 33 34 /* 進入FIN_WAIT_2狀態 */ 35 tcp_set_state(sk, TCP_FIN_WAIT2); 36 37 /* 關閉發送端 */ 38 sk->sk_shutdown |= SEND_SHUTDOWN; 39 40 /* 路由緩存確認 */ 41 sk_dst_confirm(sk); 42 43 /* 套接口不是DEAD狀態,狀態發生變化,喚醒等待進程 */ 44 if (!sock_flag(sk, SOCK_DEAD)) { 45 /* Wake up lingering close() */ 46 sk->sk_state_change(sk); 47 break; 48 } 49 50 /* linger2<0,無需在FIN_WAIT_2等待 */ 51 if (tp->linger2 < 0) { 52 /* 關閉連接 */ 53 tcp_done(sk); 54 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 55 return 1; 56 } 57 58 /* 收到期望序號以后的數據段(data, fin) */ 59 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 60 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { 61 /* Receive out of order FIN after close() */ 62 if (tp->syn_fastopen && th->fin) 63 tcp_fastopen_active_disable(sk); 64 /* 關閉連接 */ 65 tcp_done(sk); 66 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 67 return 1; 68 } 69 70 /* 獲取FIN_WAIT_2等待時間 */ 71 tmo = tcp_fin_time(sk); 72 73 /* > TIMEWAIT_LEN,加入FIN_WAIT_2定時器 */ 74 if (tmo > TCP_TIMEWAIT_LEN) { 75 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); 76 } 77 /* 有fin?? 或者 被用戶進程鎖定,加入FIN_WAIT_2定時器 */ 78 else if (th->fin || sock_owned_by_user(sk)) { 79 /* Bad case. We could lose such FIN otherwise. 80 * It is not a big problem, but it looks confusing 81 * and not so rare event. We still can lose it now, 82 * if it spins in bh_lock_sock(), but it is really 83 * marginal case. 84 */ 85 inet_csk_reset_keepalive_timer(sk, tmo); 86 } 87 /* 正常等待時間< TIMEWAIT_LEN,進入TIMEWAIT接管狀態 */ 88 else { 89 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 90 goto discard; 91 } 92 break; 93 } 94 }
狀態轉換觸發
FIN_WAIT_2狀態的走向有以下幾個流程觸發點,(1)TIME_WAIT_2定時器未超時時間內,收到數據段觸發; (2)TIME_WAIT_2定時器超時觸發; (3)TIME_WAIT定時器未超時時間內,收到數據段觸發; (4)TIME_WAIT定時器超時觸發;
(1) TIME_WAIT_2定時器未超時時間內,收到數據段觸發,如果設置FIN標記,則直接進入TIME_WAIT狀態;
在函數tcp_rcv_state_process處理數據段的過程中,FIN_WAIT_2狀態最終會調用tcp_data_queue來處理數據段;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) 2 { 3 /* step 7: process the segment text */ 4 switch (sk->sk_state) { 5 case TCP_CLOSE_WAIT: 6 case TCP_CLOSING: 7 case TCP_LAST_ACK: 8 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 9 break; 10 case TCP_FIN_WAIT1: 11 case TCP_FIN_WAIT2: 12 /* RFC 793 says to queue data in these states, 13 * RFC 1122 says we MUST send a reset. 14 * BSD 4.4 also does reset. 15 */ 16 if (sk->sk_shutdown & RCV_SHUTDOWN) { 17 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 18 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { 19 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 20 tcp_reset(sk); 21 return 1; 22 } 23 } 24 /* Fall through */ 25 case TCP_ESTABLISHED: 26 tcp_data_queue(sk, skb); 27 queued = 1; 28 break; 29 } 30 }
tcp_data_queue在處理數據段的時候,有對FIN標記的檢查,如果有該標記,則進入tcp_fin函數;
1 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 2 { 3 /* ... */ 4 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 5 tcp_fin(sk); 6 /* ... */ 7 }
tcp_fin函數中,如果此時連接狀態為FIN_WAIT_2,則發送ACK,並且直接進入TIME_WAIT狀態;在tcp_time_wait函數處理中,會刪除當前控制塊,所以FIN_WAIT_2定時器也就不存在了;
1 void tcp_fin(struct sock *sk) 2 { 3 /* ... */ 4 switch (sk->sk_state) { 5 case TCP_FIN_WAIT2: 6 /* Received a FIN -- send ACK and enter TIME_WAIT. */ 7 tcp_send_ack(sk); 8 tcp_time_wait(sk, TCP_TIME_WAIT, 0); 9 break; 10 } 11 /* ... */ 12 }
(2)TIME_WAIT_2定時器超時觸發,如果linger2<0,或者等待時間<=TIMEWAIT_LEN,直接發送reset關閉連接;如果linger2>=0,且等待時間>TIMEWAIT_LEN,則進入TIME_WAIT接管;
1 static void tcp_keepalive_timer (unsigned long data) 2 { 3 /*...*/ 4 /* 處於fin_wait2且socket即將銷毀,用作FIN_WAIT_2定時器 */ 5 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 6 7 /* 停留在FIN_WAIT_2的停留時間>=0 */ 8 if (tp->linger2 >= 0) { 9 /* 獲取時間差值 */ 10 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 11 12 /* 差值>0,等待時間>TIME_WAIT時間,則進入TIME_WAIT狀態 */ 13 if (tmo > 0) { 14 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 15 goto out; 16 } 17 } 18 19 /* 發送rst */ 20 tcp_send_active_reset(sk, GFP_ATOMIC); 21 goto death; 22 } 23 /*...*/ 24 }
(3)TIME_WAIT定時器未超時時間內,收到數據段觸發,若收到合法的FIN,則進入真正的TIME_WAIT狀態;
tcp_v4_rcv收入數據段過程中,會對TIME_WAIT狀態做特別處理,而對於TIME_WAIT子狀態的處理在函數tcp_timewait_state_process中;
1 int tcp_v4_rcv(struct sk_buff *skb) 2 { 3 /*...*/ 4 do_time_wait: 5 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 6 inet_twsk_put(inet_twsk(sk)); 7 goto discard_it; 8 } 9 10 /* 校驗和錯誤 */ 11 if (tcp_checksum_complete(skb)) { 12 inet_twsk_put(inet_twsk(sk)); 13 goto csum_error; 14 } 15 16 /* TIME_WAIT入包處理 */ 17 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 18 19 /* 收到syn */ 20 case TCP_TW_SYN: { 21 /* 查找監聽控制塊 */ 22 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 23 &tcp_hashinfo, skb, 24 __tcp_hdrlen(th), 25 iph->saddr, th->source, 26 iph->daddr, th->dest, 27 inet_iif(skb)); 28 29 /* 找到 */ 30 if (sk2) { 31 /* 刪除tw控制塊 */ 32 inet_twsk_deschedule_put(inet_twsk(sk)); 33 /* 記錄監聽控制塊 */ 34 sk = sk2; 35 refcounted = false; 36 37 /* 進行新請求的處理 */ 38 goto process; 39 } 40 /* Fall through to ACK */ 41 } 42 43 /* 發送ack */ 44 case TCP_TW_ACK: 45 tcp_v4_timewait_ack(sk, skb); 46 break; 47 /* 發送rst */ 48 case TCP_TW_RST: 49 tcp_v4_send_reset(sk, skb); 50 /* 刪除tw控制塊 */ 51 inet_twsk_deschedule_put(inet_twsk(sk)); 52 goto discard_it; 53 /* 成功*/ 54 case TCP_TW_SUCCESS:; 55 } 56 goto discard_it; 57 }
tcp_timewait_state_process函數處理流程中,如果TIME_WAIT的子狀態為FIN_WAIT_2,並且收到了合法的FIN之后,會進入真正的TIME_WAIT狀態,即子狀態也為TIME_WAIT,並且設置TIME_WAIT定時器;
1 enum tcp_tw_status 2 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, 3 const struct tcphdr *th) 4 { 5 /*...*/ 6 /* 子狀態是FIN_WAIT2 */ 7 if (tw->tw_substate == TCP_FIN_WAIT2) { 8 /* Just repeat all the checks of tcp_rcv_state_process() */ 9 10 /* Out of window, send ACK */ 11 /* 序號回繞或者數據超出窗口范圍,發送ack */ 12 if (paws_reject || 13 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 14 tcptw->tw_rcv_nxt, 15 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) 16 return tcp_timewait_check_oow_rate_limit( 17 tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); 18 19 /* rst,則停止調度,銷毀tw控制塊 */ 20 if (th->rst) 21 goto kill; 22 23 /* syn && 序號>= 期望接收序號??? */ 24 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) 25 return TCP_TW_RST; 26 27 /* Dup ACK? */ 28 /* 非ack || 以前的ack || 新的無數據ack */ 29 if (!th->ack || 30 !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || 31 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { 32 inet_twsk_put(tw); 33 return TCP_TW_SUCCESS; 34 } 35 36 /* New data or FIN. If new data arrive after half-duplex close, 37 * reset. 38 */ 39 /* 不是fin,或者fin有數據 */ 40 if (!th->fin || 41 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) 42 return TCP_TW_RST; 43 44 /* FIN arrived, enter true time-wait state. */ 45 /* fin包,進入真正的TIME_WAIT */ 46 tw->tw_substate = TCP_TIME_WAIT; 47 48 /* 設置下一次接收序號 */ 49 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; 50 51 /* 設置時間戳 */ 52 if (tmp_opt.saw_tstamp) { 53 tcptw->tw_ts_recent_stamp = get_seconds(); 54 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 55 } 56 57 /*重新設置tw定時器 */ 58 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); 59 60 /* 發送ack */ 61 return TCP_TW_ACK; 62 } 63 /*...*/ 64 }
(4)TIME_WAIT定時器超時觸發,定時器超時,將tw控制塊從ehash和bhash中刪除,在收到數據段會發送reset;
定時器超時會進入到tw_timer_handler處理函數,該函數在統計信息之后,調用inet_twsk_kill;
1 static void tw_timer_handler(unsigned long data) 2 { 3 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; 4 5 if (tw->tw_kill) 6 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 7 else 8 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED); 9 inet_twsk_kill(tw); 10 }
inet_twsk_kill從ehash和bhash中把tw控制塊刪除,並且釋放之;
1 static void inet_twsk_kill(struct inet_timewait_sock *tw) 2 { 3 struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; 4 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 5 struct inet_bind_hashbucket *bhead; 6 7 spin_lock(lock); 8 sk_nulls_del_node_init_rcu((struct sock *)tw); 9 spin_unlock(lock); 10 11 /* Disassociate with bind bucket. */ 12 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 13 hashinfo->bhash_size)]; 14 15 spin_lock(&bhead->lock); 16 inet_twsk_bind_unhash(tw, hashinfo); 17 spin_unlock(&bhead->lock); 18 19 atomic_dec(&tw->tw_dr->tw_count); 20 inet_twsk_put(tw); 21 }