深入理解TCP協議及其源代碼——TCP三次握客戶端tcp層SYN包的發送


1、TCP基本概念

傳輸控制協議TCP是一種面向連接的、可靠的、基於字節流的運輸層通信協議。TCP層是位於IP層之上,應用層之下的傳輸層。

2、TCP連接時三次握手示意

 

 

3. TCP協議棧從上到下提供的接口

 

 

 

創建socket

創建TCP socket調用接口

 

 在創建socket套接字描述符, sys_socket內核函數會根據指定的協議(例如socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP))掛載對應的協議處理函數

250 static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
251{
...
262     /* Look for the requested type/protocol pair. */
263     lookup_protocol:
264     err = -ESOCKTNOSUPPORT;
265     rcu_read_lock();

           // TCP套接字、UDP套接字、原始套接字的inet_protosw實 例都在inetsw_array數組中定義,
           //這些實例會調inet_register_protosw()注冊到inetsw中
          //根據protocol查找要創建的套接字對應的四層傳輸協議。
266     list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
268           ...
283     }
284
           //如果沒有找到,則調用request_module()來嘗試加載協議所屬的模塊,正常情況下不會發生。
285     if (unlikely(err)) {
286             if (try_loading_module < 2) {
287                     rcu_read_unlock();
...
}

  三次握手

 結構體變量struct proto tcp_prot指定了TCP協議棧的訪問接口函數

 

 首先客戶端發送SYN報文

調用tcp_v4_connect函數建立與服務器聯系並發送SYN段:

tcp_v4_connect函數

 

 

140/* This will initiate an outgoing connection. */
141int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142{
...
171    rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172                          RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173                          IPPROTO_TCP,
174                          orig_sport, orig_dport, sk);
...
214215    /* Socket identity is still unknown (sport may be zero).
216     * However we set state to SYN-SENT and not releasing socket
217     * lock select source port, enter ourselves into the hash tables and
218     * complete initialization after this.
219     */
220   tcp_set_state(sk, TCP_SYN_SENT);
...
227    rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228                           inet->inet_sport, inet->inet_dport, sk);
...
246    err = tcp_connect(sk);
...
}
265EXPORT_SYMBOL(tcp_v4_connect);

此函數前面部分是確定socket的源端口,目的ip及端口。目的IP和目的端口是由connect系統調用的入參指定。tcp_connect函數用於構建並發送一個SYN請求。

tcp_connect函數

  • 構造一個攜帶SYN標志位的TCP頭,tcp_init_nondata_skb函數實現
  • 發送帶有SYN的TCP報文,tcp_transmit_skb函數實現
  • 設置計時器超時重發,net_csk_reset_xmit_timer函數實現

 

 

3090/* Build a SYN and send it off. */
3091int tcp_connect(struct sock *sk)
3092{
...
3108       /* Reserve space for headers. */
3109       skb_reserve(buff, MAX_TCP_HEADER);
3110
3111       tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3112       tp->retrans_stamp = tcp_time_stamp;
3113       tcp_connect_queue_skb(sk, buff);
3114       tcp_ecn_send_syn(sk, buff);
3115
3116       /* Send off SYN; include data in Fast Open. */
3117       err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
3118             tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
...
3129       /* Timer for repeating the SYN until an answer. */
3130      inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
3131                                 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
...
}
3134EXPORT_SYMBOL(tcp_connect);

tcp_transmit_sbk函數

__tcp_transmit_skb函數的主要任務是向ip層發送數據包,其中包括

初始化TCP協議頭等數據結構

查看clone_it是否要克隆Socket Buffer,應用Socket Buffer可能正被其他進程使用,就要克隆一個份

構建TCP協議選項

阻塞控制,確定網絡上有多少數據包最好

構建TCP協議頭主要的數據域:源端口、目的端口、數據段初始序列號,計算窗口大小,如果是SYN請求包就不需要計算窗口大小

發送數據包到ip層,發送過程狀態機切換,發送SYN包之后切換為SYN_SENT

// net/ipv4/tcp_output.c
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                gfp_t gfp_mask)
{
    return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
                  tcp_sk(sk)->rcv_nxt);
}

tcp_transmit_skb是對__tcp_transmit_skb的封裝,繼續調用,進入__tcp_transmit_skb發送SYN報文

__tcp_transmit_skb函數

// net/ipv4/tcp_output.c
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
                  int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
    const struct inet_connection_sock *icsk = inet_csk(sk);
    struct inet_sock *inet;
    struct tcp_sock *tp;
    struct tcp_skb_cb *tcb;
    struct tcp_out_options opts;
    unsigned int tcp_options_size, tcp_header_size;
    struct sk_buff *oskb = NULL;
    struct tcp_md5sig_key *md5;
    struct tcphdr *th;
    u64 prior_wstamp;
    int err;

    BUG_ON(!skb || !tcp_skb_pcount(skb));
    tp = tcp_sk(sk);

    if (clone_it) { 
        Socket Buffer
        TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
            - tp->snd_una;
        oskb = skb;

        tcp_skb_tsorted_save(oskb) {
            if (unlikely(skb_cloned(oskb)))
                skb = pskb_copy(oskb, gfp_mask);
            else
                skb = skb_clone(oskb, gfp_mask);
        } tcp_skb_tsorted_restore(oskb);

        if (unlikely(!skb))
            return -ENOBUFS;
    }

    prior_wstamp = tp->tcp_wstamp_ns;
    tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);

    skb->skb_mstamp_ns = tp->tcp_wstamp_ns;

    inet = inet_sk(sk); 
    tcb = TCP_SKB_CB(skb); 
    memset(&opts, 0, sizeof(opts));

    if (unlikely(tcb->tcp_flags & TCPHDR_SYN))  
        tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
    else
        tcp_options_size = tcp_established_options(sk, skb, &opts,
                               &md5);
    tcp_header_size = tcp_options_size + sizeof(struct tcphdr);

    /* if no packet is in qdisc/device queue, then allow XPS to select
     * another queue. We can be called from tcp_tsq_handler()
     * which holds one reference to sk.
     *
     * TODO: Ideally, in-flight pure ACK packets should not matter here.
     * One way to get this would be to set skb->truesize = 2 on them.
     */
    skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);

    /* If we had to use memory reserve to allocate this skb,
     * this might cause drops if packet is looped back :
     * Other socket might not have SOCK_MEMALLOC.
     * Packets not looped back do not care about pfmemalloc.
     */
    skb->pfmemalloc = 0;

    skb_push(skb, tcp_header_size);
    skb_reset_transport_header(skb);

    skb_orphan(skb);
    skb->sk = sk;
    skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
    skb_set_hash_from_sk(skb, sk);
    refcount_add(skb->truesize, &sk->sk_wmem_alloc);

    skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);

    /* Build TCP header and checksum it. */
    th = (struct tcphdr *)skb->data;
    th->source      = inet->inet_sport;
    th->dest        = inet->inet_dport;
    th->seq         = htonl(tcb->seq);
    th->ack_seq     = htonl(rcv_nxt);
    *(((__be16 *)th) + 6)   = htons(((tcp_header_size >> 2) << 12) |
                    tcb->tcp_flags);

    th->check       = 0;
    th->urg_ptr     = 0;

    /* The urg_mode check is necessary during a below snd_una win probe */
    if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
        if (before(tp->snd_up, tcb->seq + 0x10000)) {
            th->urg_ptr = htons(tp->snd_up - tcb->seq);
            th->urg = 1;
        } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
            th->urg_ptr = htons(0xFFFF);
            th->urg = 1;
        }
    }

    tcp_options_write((__be32 *)(th + 1), tp, &opts);
    skb_shinfo(skb)->gso_type = sk->sk_gso_type;
    if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
        th->window      = htons(tcp_select_window(sk));
        tcp_ecn_send(sk, skb, th, tcp_header_size);
    } else {
        /* RFC1323: The window in SYN & SYN/ACK segments
         * is never scaled.
         */
        th->window  = htons(min(tp->rcv_wnd, 65535U));
    }
#ifdef CONFIG_TCP_MD5SIG
    /* Calculate the MD5 hash, as we have all we need now */
    if (md5) {
        sk_nocaps_add(sk, NETIF_F_GSO_MASK);
        tp->af_specific->calc_md5_hash(opts.hash_location,
                           md5, sk, skb);
    }
#endif

    icsk->icsk_af_ops->send_check(sk, skb);

    if (likely(tcb->tcp_flags & TCPHDR_ACK))
        tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);

    if (skb->len != tcp_header_size) {
        tcp_event_data_sent(tp, sk);
        tp->data_segs_out += tcp_skb_pcount(skb);
        tp->bytes_sent += skb->len - tcp_header_size;
    }

    if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
        TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
                  tcp_skb_pcount(skb));

    tp->segs_out += tcp_skb_pcount(skb);
    /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
    skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
    skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);

    /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */

    /* Cleanup our debris for IP stacks */
    memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
                   sizeof(struct inet6_skb_parm)));

    err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
    if (unlikely(err > 0)) { 
        tcp_enter_cwr(sk);
        err = net_xmit_eval(err);
    }
    if (!err && oskb) {
        tcp_update_skb_after_send(sk, oskb, prior_wstamp);
        tcp_rate_skb_sent(sk, oskb);
    }
    return err;
}    

客戶端tcp層是完成SYN包的發送了,經過下層傳輸到網卡。之后服務端接收客戶端發來的tcp報文,並發送回SYN+ACK。

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM