深入理解TCP協議的三次握手及其源代碼


三次握手

相信大部分人都知道TCP三次握手的機制是什么,流程及客戶端和服務器端狀態如下圖:


關於客戶端、服務器狀態解析的可以參考之前寫的博文Linux下netstat命令詳解

TCP的三次握手從用戶程序的角度看就是客戶端connect和服務端accept建立起連接時背后的完成的工作,在內核socket接口層這兩個socket API函數對應着sys_connect和sys_accept函數,進一步對應着sock->opt->connect和sock->opt->accept兩個函數指針。

進行源代碼分析,在net/ipv4/tcp-ipv4.c文件下的結構體變量struct proto tcp_prot查看了TCP協議棧的訪問接口函數,可以看到實際的調用關系,在TCP協議中這兩個函數指針對應着tcp_v4_connect函數和inet_csk_accept函數。

struct proto tcp_prot = {
   .name			= "TCP",
   .owner			= THIS_MODULE,
   .close			= tcp_close,
   .pre_connect		= tcp_v4_pre_connect,
   .connect		= tcp_v4_connect,
   .disconnect		= tcp_disconnect,
   .accept			= inet_csk_accept,
   .ioctl			= tcp_ioctl,
   .init			= tcp_v4_init_sock,
   .destroy		= tcp_v4_destroy_sock,
   .shutdown		= tcp_shutdown,
   .setsockopt		= tcp_setsockopt,
   .getsockopt		= tcp_getsockopt,
   .keepalive		= tcp_set_keepalive,
   .recvmsg		= tcp_recvmsg,
   .sendmsg		= tcp_sendmsg,
   .sendpage		= tcp_sendpage,
   .backlog_rcv		= tcp_v4_do_rcv,
   .release_cb		= tcp_release_cb,
   .hash			= inet_hash,
   .unhash			= inet_unhash,
   .get_port		= inet_csk_get_port,
   .enter_memory_pressure	= tcp_enter_memory_pressure,
   .leave_memory_pressure	= tcp_leave_memory_pressure,
   .stream_memory_free	= tcp_stream_memory_free,
   .sockets_allocated	= &tcp_sockets_allocated,
   .orphan_count		= &tcp_orphan_count,
   .memory_allocated	= &tcp_memory_allocated,
   .memory_pressure	= &tcp_memory_pressure,
   .sysctl_mem		= sysctl_tcp_mem,
   .sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
   .sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
   .max_header		= MAX_TCP_HEADER,
   .obj_size		= sizeof(struct tcp_sock),
   .slab_flags		= SLAB_TYPESAFE_BY_RCU,
   .twsk_prot		= &tcp_timewait_sock_ops,
   .rsk_prot		= &tcp_request_sock_ops,
   .h.hashinfo		= &tcp_hashinfo,
   .no_autobind		= true,
#ifdef CONFIG_COMPAT
   .compat_setsockopt	= compat_tcp_setsockopt,
   .compat_getsockopt	= compat_tcp_getsockopt,
#endif
   .diag_destroy		= tcp_abort,
};

通過源代碼分析三次握手過程

通過之前的gdb調試追蹤,我們知道相應的socket系統調用函數都在net/socket.c目錄下,其中socket接口函數都定義在SYSCALL_DEFINE接口里,找到主要的相關SYSCALL_DEFINE定義如下:

SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
    return __sys_socket(family, type, protocol);
}

SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
    return __sys_bind(fd, umyaddr, addrlen);
}

SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
    return __sys_listen(fd, backlog);
}


SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
        int __user *, upeer_addrlen)
{
    return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
}

SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
        int, addrlen)
{
    return __sys_connect(fd, uservaddr, addrlen);
}

SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
        int __user *, usockaddr_len)
{
    return __sys_getsockname(fd, usockaddr, usockaddr_len);
}

SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
        int __user *, usockaddr_len)
{
    return __sys_getpeername(fd, usockaddr, usockaddr_len);
}


SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
        unsigned int, flags)
{
    return __sys_sendto(fd, buff, len, flags, NULL, 0);
}

SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
        unsigned int, flags)
{
    return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}

第一次握手

客戶端發起SYN請求,是通過調用__sys_connect發起連接,源碼如下:

int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err, fput_needed;
        //獲取socket
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;
	err = move_addr_to_kernel(uservaddr, addrlen, &address);
	if (err < 0)
		goto out_put;

	err =
	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
	if (err)
		goto out_put;

        //對於流式套接字,sock->ops為 inet_stream_ops -->inet_stream_connect
        //對於數據報套接字,sock->ops為 inet_dgram_ops --> inet_dgram_connect
	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
				 sock->file->f_flags);
out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
}

該函數完成:

  1. 根據文件描述符找到指定的socket對象;

  2. 將地址信息從用戶空間拷貝到內核空間;

  3. 針對TCP/UDP協議,調用流式/數據報套接字的connect函數。

TCP對應的是流式套接字,其connect函數為inet_stream_connect,源代碼:

int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
            int addr_len, int flags)
{
    int err;
 
    lock_sock(sock->sk);
    err = __inet_stream_connect(sock, uaddr, addr_len, flags);
    release_sock(sock->sk);
    return err;
}
 
 
/*
 *    Connect to a remote host. There is regrettably still a little
 *    TCP 'magic' in here.
 */
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
              int addr_len, int flags, int is_sendmsg)
{
    struct sock *sk = sock->sk;
    int err;
    long timeo;

    /*
     * uaddr can be NULL and addr_len can be 0 if:
     * sk is a TCP fastopen active socket and
     * TCP_FASTOPEN_CONNECT sockopt is set and
     * we already have a valid cookie for this socket.
     * In this case, user can call write() after connect().
     * write() will invoke tcp_sendmsg_fastopen() which calls
     * __inet_stream_connect().
     */
    /* 地址存在 */
    if (uaddr) {

        /* 檢查地址長度 */
        if (addr_len < sizeof(uaddr->sa_family))
            return -EINVAL;

        /* 對unspec的特殊處理 */
        if (uaddr->sa_family == AF_UNSPEC) {
            err = sk->sk_prot->disconnect(sk, flags);
            sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
            goto out;
        }
    }

    /* 根據socket狀態對應處理 */
    switch (sock->state) {
    default:
        err = -EINVAL;
        goto out;
        /* 已連接 */
    case SS_CONNECTED:
        err = -EISCONN;
        goto out;
        /* 正在連接 */
    case SS_CONNECTING:
        /* 對是否發消息做不同處理,fastopen*/
        if (inet_sk(sk)->defer_connect)
            err = is_sendmsg ? -EINPROGRESS : -EISCONN;
        else
            err = -EALREADY;
        /* Fall out of switch with err, set for this state */
        break;
        /* 未連接 */
    case SS_UNCONNECTED:
        err = -EISCONN;
        /* 需要為closed狀態 */
        if (sk->sk_state != TCP_CLOSE)
            goto out;

        /* 傳輸層協議的connect */
        err = sk->sk_prot->connect(sk, uaddr, addr_len);
        if (err < 0)
            goto out;

        /* 標記狀態為正在連接 */
        sock->state = SS_CONNECTING;

        /* fastopen */
        if (!err && inet_sk(sk)->defer_connect)
            goto out;

        /* Just entered SS_CONNECTING state; the only
         * difference is that return value in non-blocking
         * case is EINPROGRESS, rather than EALREADY.
         */
        /* 非阻塞情況返回inprogress,阻塞返回already */
        err = -EINPROGRESS;
        break;
    }

    /* 阻塞情況下需要獲取超時時間,非阻塞為0 */
    timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);

    /* 已發送或者已收到syn */
    if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
        int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
                tcp_sk(sk)->fastopen_req &&
                tcp_sk(sk)->fastopen_req->data ? 1 : 0;

        /* Error code is set above */
        /* 非阻塞退出,阻塞則等待連接,等待剩余時間為0,退出 */
        if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
            goto out;

        /* 處理信號,達到最大調度時間或者被打斷 */
        err = sock_intr_errno(timeo);
        if (signal_pending(current))
            goto out;
    }

    /* Connection was closed by RST, timeout, ICMP error
     * or another process disconnected us.
     */
    /* 狀態為關閉 */
    if (sk->sk_state == TCP_CLOSE)
        goto sock_error;

    /* sk->sk_err may be not zero now, if RECVERR was ordered by user
     * and error was received after socket entered established state.
     * Hence, it is handled normally after connect() return successfully.
     */

    /* 設置為連接狀態 */
    sock->state = SS_CONNECTED;
    err = 0;
out:
    return err;

sock_error:
    err = sock_error(sk) ? : -ECONNABORTED;
    /* 設置未連接狀態 */
    sock->state = SS_UNCONNECTED;
    if (sk->sk_prot->disconnect(sk, flags))
        sock->state = SS_DISCONNECTING;
    goto out;
}

該函數完成:

  1. 檢查socket地址長度和使用的協議族。
  2. 檢查socket的狀態,必須是SS_UNCONNECTED或SS_CONNECTING。
  3. 調用tcp_v4_connect()來發送SYN包。
  4. 等待后續握手的完成。

由tcp_v4_connect()->tcp_connect()->tcp_transmit_skb()發送,並置為TCP_SYN_SENT.下面是tcp_v4_connect()和tcp_connect()的源代碼

int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
	struct inet_sock *inet = inet_sk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	__be16 orig_sport, orig_dport;
	__be32 daddr, nexthop;
	struct flowi4 *fl4;
	struct rtable *rt;
	int err;
	struct ip_options_rcu *inet_opt;
	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;

	if (addr_len < sizeof(struct sockaddr_in))
		return -EINVAL;

	if (usin->sin_family != AF_INET)
		return -EAFNOSUPPORT;

	nexthop = daddr = usin->sin_addr.s_addr;
	inet_opt = rcu_dereference_protected(inet->inet_opt,
					     lockdep_sock_is_held(sk));
	if (inet_opt && inet_opt->opt.srr) {
		if (!daddr)
			return -EINVAL;
		nexthop = inet_opt->opt.faddr;
	}

	orig_sport = inet->inet_sport;
	orig_dport = usin->sin_port;
	fl4 = &inet->cork.fl.u.ip4;
	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
			      IPPROTO_TCP,
			      orig_sport, orig_dport, sk);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		if (err == -ENETUNREACH)
			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
		return err;
	}

	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
		ip_rt_put(rt);
		return -ENETUNREACH;
	}

	if (!inet_opt || !inet_opt->opt.srr)
		daddr = fl4->daddr;

	if (!inet->inet_saddr)
		inet->inet_saddr = fl4->saddr;
	sk_rcv_saddr_set(sk, inet->inet_saddr);

	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
		/* Reset inherited state */
		tp->rx_opt.ts_recent	   = 0;
		tp->rx_opt.ts_recent_stamp = 0;
		if (likely(!tp->repair))
			tp->write_seq	   = 0;
	}

	inet->inet_dport = usin->sin_port;
	sk_daddr_set(sk, daddr);

	inet_csk(sk)->icsk_ext_hdr_len = 0;
	if (inet_opt)
		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;

	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;

	/* Socket identity is still unknown (sport may be zero).
	 * However we set state to SYN-SENT and not releasing socket
	 * lock select source port, enter ourselves into the hash tables and
	 * complete initialization after this.
	 */
	tcp_set_state(sk, TCP_SYN_SENT);
	err = inet_hash_connect(tcp_death_row, sk);
	if (err)
		goto failure;

	sk_set_txhash(sk);

	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
			       inet->inet_sport, inet->inet_dport, sk);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		rt = NULL;
		goto failure;
	}
	/* OK, now commit destination to socket.  */
	sk->sk_gso_type = SKB_GSO_TCPV4;
	sk_setup_caps(sk, &rt->dst);
	rt = NULL;

	if (likely(!tp->repair)) {
		if (!tp->write_seq)
			tp->write_seq = secure_tcp_seq(inet->inet_saddr,
						       inet->inet_daddr,
						       inet->inet_sport,
						       usin->sin_port);
		tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
						 inet->inet_saddr,
						 inet->inet_daddr);
	}

	inet->inet_id = tp->write_seq ^ jiffies;

	if (tcp_fastopen_defer_connect(sk, &err))
		return err;
	if (err)
		goto failure;

	err = tcp_connect(sk);

	if (err)
		goto failure;

	return 0;

failure:
	/*
	 * This unhashes the socket and releases the local port,
	 * if necessary.
	 */
	tcp_set_state(sk, TCP_CLOSE);
	ip_rt_put(rt);
	sk->sk_route_caps = 0;
	inet->inet_dport = 0;
	return err;
}
int tcp_connect(struct sock *sk)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct sk_buff *buff;
    int err;
 
    //初始化傳輸控制塊中與連接相關的成員
    tcp_connect_init(sk);
 
    if (unlikely(tp->repair)) {
        tcp_finish_connect(sk, NULL);
        return 0;
    }
    //為SYN段分配報文並進行初始化(分配skbuff)
    buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
    if (unlikely(!buff))
        return -ENOBUFS;
    
    //在函數tcp_v4_connect中write_seq已經被初始化隨機值
    tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
    
    tp->retrans_stamp = tcp_time_stamp;
 
    //將報文添加到發送隊列上
    tcp_connect_queue_skb(sk, buff);
 
    //擁塞控制
    tcp_ecn_send_syn(sk, buff);
 
    err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
 
          //構造tcp頭和ip頭並發送
          tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
    if (err == -ECONNREFUSED)
        return err;
 
    tp->snd_nxt = tp->write_seq;
    tp->pushed_seq = tp->write_seq;
    TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
    //啟動重傳定時器
    inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
    return 0;
}

該函數完成:

  1. 初始化套接字跟連接相關的字段。
  2. 申請sk_buff空間。
  3. 將sk_buff初始化為syn報文,實質是操作tcp_skb_cb,在初始化TCP頭的時候會用到。
  4. 調用tcp_connect_queue_skb()函數將報文sk_buff添加到發送隊列sk->sk_write_queue。
  5. 調用tcp_transmit_skb()函數構造tcp頭,然后交給網絡層。
  6. 初始化重傳定時器

tcp_connect_queue_skb()函數的原理主要是移動sk_buff的data指針,然后填充TCP頭。再然后將報文交給網絡層,將報文發出。

這樣,三次握手中的第一次握手在客戶端的層面完成,報文到達服務端,由服務端處理完畢后,第一次握手完成,客戶端socket狀態變為TCP_SYN_SENT。

第二次握手

當服務端收到客戶端發送的報文之后,處理第二次握手,調用tcp_v4_rev,代碼中重要的部分如下

int tcp_v4_rcv(struct sk_buff *skb)
{
    ...
    //如果不是發往本地的數據包,則直接丟棄
    if (skb->pkt_type != PACKET_HOST)
        goto discard_it;
 
    /* Count it even if it's bad */
    __TCP_INC_STATS(net, TCP_MIB_INSEGS);
 
 
    ////包長是否大於TCP頭的長度
    if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
        goto discard_it;
 
    ...
    //根據源端口號,目的端口號和接收的interface查找sock對象
    //監聽哈希表中才能找到
    sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
                   th->dest, &refcounted);
    
    //如果找不到處理的socket對象,就把數據報丟掉
    if (!sk)
        goto no_tcp_socket;

  ...

    //如果socket處於監聽狀態
    if (sk->sk_state == TCP_LISTEN) {
        ret = tcp_v4_do_rcv(sk, skb);
        goto put_and_return;
    }
 
    sk_incoming_cpu_update(sk);
 
    bh_lock_sock_nested(sk);
    tcp_segs_in(tcp_sk(sk), skb);
    ret = 0;
    
    //查看是否有用戶態進程對該sock進行了鎖定
    //如果sock_owned_by_user為真,則sock的狀態不能進行更改
    if (!sock_owned_by_user(sk)) {
        if (!tcp_prequeue(sk, skb))
            ret = tcp_v4_do_rcv(sk, skb);
    } else if (tcp_add_backlog(sk, skb)) {
        goto discard_and_relse;
    }
    bh_unlock_sock(sk);
 
put_and_return:
    if (refcounted)
        sock_put(sk);
 
    return ret;
 
  ...
}

該函數完成

  1. 根據tcp頭部信息查到報文的socket對象,
  2. 檢查socket狀態從而做出不同處理(這里是狀態是TCP_LISTEN,直接調用函數tcp_v4_do_rcv,接着是調用tcp_rcv_state_process)

tcp_rcv_state_process源代碼的重要部分如下

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
    ...switch (sk->sk_state) {
        
    //SYN_RECV狀態的處理 
    case TCP_CLOSE:
        goto discard;
 
    //服務端第一次握手處理
    case TCP_LISTEN:
        if (th->ack)
            return 1;
        if (th->rst)
            goto discard;
        if (th->syn) {
            if (th->fin)
                goto discard;
            if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
                return 1;
            consume_skb(skb);
            return 0;
        }
        goto discard;
 
    //客戶端第二次握手處理 
    case TCP_SYN_SENT:
        tp->rx_opt.saw_tstamp = 0;
 
        //處理SYN_SENT狀態下接收到的TCP段
        queued = tcp_rcv_synsent_state_process(sk, skb, th);
        if (queued >= 0)
            return queued;
 
        //處理完第二次握手后,還需要處理帶外數據
        tcp_urg(sk, skb, th);
        __kfree_skb(skb);
 
        //檢測是否有數據需要發送
        tcp_data_snd_check(sk);
        return 0;
    }
 
   ...switch (sk->sk_state) {
    case TCP_SYN_RECV:
        if (!acceptable)
            return 1;
 
        if (!tp->srtt_us)
            tcp_synack_rtt_meas(sk, req);
 
        if (req) {
            inet_csk(sk)->icsk_retransmits = 0;
            reqsk_fastopen_remove(sk, req, false);
        } else {
            //建立路由,初始化擁塞控制模塊
            icsk->icsk_af_ops->rebuild_header(sk);
            tcp_init_congestion_control(sk);
 
            tcp_mtup_init(sk);
            tp->copied_seq = tp->rcv_nxt;
            tcp_init_buffer_space(sk);
        }
        smp_mb();
        //正常的第三次握手,設置連接狀態為TCP_ESTABLISHED 
        tcp_set_state(sk, TCP_ESTABLISHED);
        sk->sk_state_change(sk);
 
        //狀態已經正常,喚醒那些等待的線程
        if (sk->sk_socket)
            sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 
        ...
 
        //更新最近一次發送數據包的時間
        tp->lsndtime = tcp_time_stamp;
 
        tcp_initialize_rcv_mss(sk);
 
        //計算有關TCP首部預測的標志
        tcp_fast_path_on(tp);
        break;
 
    ...
}

這是TCP建立連接的關鍵,幾乎所有狀態的套接字,在收到報文時都會在這里完成處理。對於服務端來說,收到第一次握手報文時的狀態為TCP_LISTEN,接下來將由tcp_v4_conn_request函數處理,該函數實際調用的是tcp_conn_request,如下:

int tcp_conn_request(struct request_sock_ops *rsk_ops,
             const struct tcp_request_sock_ops *af_ops,
             struct sock *sk, struct sk_buff *skb)
{
    ...
 
    //處理TCP SYN FLOOD攻擊相關的東西

    if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
         inet_csk_reqsk_queue_is_full(sk)) && !isn) {
        want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
        if (!want_cookie)
            goto drop;
    }
 
    if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
        NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
        goto drop;
    }
 
    req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
    if (!req)
        goto drop;
 
    //特定協議的request_sock的特殊操作函數集
    tcp_rsk(req)->af_specific = af_ops;
 
    tcp_clear_options(&tmp_opt);
    tmp_opt.mss_clamp = af_ops->mss_clamp;
    tmp_opt.user_mss  = tp->rx_opt.user_mss;
    tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
 
    if (want_cookie && !tmp_opt.saw_tstamp)
        tcp_clear_options(&tmp_opt);
 
    tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
    //初始化連接請求塊,包括request_sock、inet_request_sock、tcp_request_sock
    tcp_openreq_init(req, &tmp_opt, skb, sk);
    
    inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
    inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);

    af_ops->init_req(req, sk, skb);
 
    ...//接收窗口初始化
    tcp_openreq_init_rwin(req, sk, dst);
    if (!want_cookie) {
        tcp_reqsk_record_syn(sk, req, skb);
        fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
    }

    ...


drop_and_release:
    dst_release(dst);
drop_and_free:
    reqsk_free(req);
drop:
    tcp_listendrop(sk);
    return 0;
}

調用該函數后,會分配一個request_sock對象來代表這次連接請求(狀態為TCP_NEW_SYN_RECV),再調用tcp_v4_send_synack回復客戶端ack,開啟第二次握手

static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
                  struct flowi *fl,
                  struct request_sock *req,
                  struct tcp_fastopen_cookie *foc,
                  enum tcp_synack_type synack_type)
{
    const struct inet_request_sock *ireq = inet_rsk(req);
    struct flowi4 fl4;
    int err = -1;
    struct sk_buff *skb;
 
    /* First, grab a route. */
 
    //查找到客戶端的路由
    if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
        return -1;
 
    //根據路由、傳輸控制塊、連接請求塊中的構建SYN+ACK段
    skb = tcp_make_synack(sk, dst, req, foc, synack_type);
 
    //生成SYN+ACK段成功
    if (skb) {
        //生成校驗碼
        __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
        //生成IP數據報並發送出去
        err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
                        ireq->ir_rmt_addr,
                        ireq->opt);
        err = net_xmit_eval(err);
    }
    return err;
}

查找客戶端路由,構造syn包,然后調用ip_build_and_send_pkt,通過網絡層將數據報發出去。
至此,第二次握手完成。客戶端socket狀態變為TCP_ESTABLISHED,此時服務端socket的狀態為TCP_NEW_SYN_RECV。

第三次握手

1、客戶端收到SYN+ACK報文並回復。

        case TCP_SYN_SENT:
        tp->rx_opt.saw_tstamp = 0;
 
        //處理SYN_SENT狀態下接收到的TCP段
        queued = tcp_rcv_synsent_state_process(sk, skb, th);
        if (queued >= 0)
            return queued;
 
        /* Do step6 onward by hand. */
 
        //處理完第二次握手后,還需要處理帶外數據
        tcp_urg(sk, skb, th);
        __kfree_skb(skb);
 
        //檢測是否有數據需要發送
        tcp_data_snd_check(sk);
        return 0;
    }

2、服務器端接收ACK報文
最后服務器端還需要接受ACK報文並進入到ESTABLISHED狀態,三次握手才算最終完成。

        case TCP_SYN_RECV:
        if (!acceptable)
            return 1;
 
        if (!tp->srtt_us)
            tcp_synack_rtt_meas(sk, req);
 
        /* Once we leave TCP_SYN_RECV, we no longer need req
         * so release it.
         */
        if (req) {
            inet_csk(sk)->icsk_retransmits = 0;
            reqsk_fastopen_remove(sk, req, false);
        } else {
            /* Make sure socket is routed, for correct metrics. */
 
            //建立路由,初始化擁塞控制模塊
            icsk->icsk_af_ops->rebuild_header(sk);
            tcp_init_congestion_control(sk);
 
            tcp_mtup_init(sk);
            tp->copied_seq = tp->rcv_nxt;
            tcp_init_buffer_space(sk);
        }
        smp_mb();
        //正常的第三次握手,設置連接狀態為TCP_ESTABLISHED 
        tcp_set_state(sk, TCP_ESTABLISHED);
        sk->sk_state_change(sk);
 
        /* Note, that this wakeup is only for marginal crossed SYN case.
         * Passively open sockets are not waked up, because
         * sk->sk_sleep == NULL and sk->sk_socket == NULL.
         */
 
        //狀態已經正常,喚醒那些等待的線程
        if (sk->sk_socket)
            sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 
        tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
        tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
        tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
 
        if (tp->rx_opt.tstamp_ok)
            tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 
        if (req) {
            /* Re-arm the timer because data may have been sent out.
             * This is similar to the regular data transmission case
             * when new data has just been ack'ed.
             *
             * (TFO) - we could try to be more aggressive and
             * retransmitting any data sooner based on when they
             * are sent out.
             */
            tcp_rearm_rto(sk);
        } else
            tcp_init_metrics(sk);
 
        if (!inet_csk(sk)->icsk_ca_ops->cong_control)
            tcp_update_pacing_rate(sk);
 
        /* Prevent spurious tcp_cwnd_restart() on first data packet */
 
        //更新最近一次發送數據包的時間
        tp->lsndtime = tcp_time_stamp;
 
        tcp_initialize_rcv_mss(sk);
 
        //計算有關TCP首部預測的標志
        tcp_fast_path_on(tp);
        break;


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM