Socket connect 等簡要分析


  1. connect 系統調用 分析

#include <sys/types.h> /* See NOTES */
#include <sys/socket.h>
int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);

其中的參數解釋如下:
·int sockfd :套接字描述符。
·const struct sockaddr*addr :要連接的地址。
·socklen_t addrlen :要連接的地址長度。
返回值 0 表示成功, -1 表示失敗。

connect 的用途是使用指定的套接字去連接指定的地址。對於面向連接的協議(套接字類型為
SOCK_STREAM ), connect 只能成功一次(當然要如此,因為真正的連接已經建立了)。如果重復調
用 connect ,會返回 -1 表示失敗,同時錯誤碼為 EISCONN 。而對於非面向連接的協議(套接字類型為
SOCK_DGRAM ),則可以執行多次 connect (因為這時的 connect 僅僅是設置了默認的目的地址)。

對於 TCP 套接字來說, connect 實際上是要真正地進行三次握手,所以其默認是一個阻塞操作。那么
是否可以寫一個非阻塞的 TCP connect 代碼呢?

/*
 *	Attempt to connect to a socket with the server address.  The address
 *	is in user space so we verify it is OK and move it to kernel space.
 *
 *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 *	break bindings
 *
 *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 *	other SEQPACKET protocols that take time to connect() as it doesn't
 *	include the -EINPROGRESS status for such sockets.
 */

SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
		int, addrlen)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err, fput_needed;
/* 通過文件描述符fd,找到對應的socket實例。
     * 以fd為索引從當前進程的文件描述符表files_struct實例中找到對應的file實例,
     * 然后從file實例的private_data成員中獲取socket實例。
    */
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;
 /* 把套接字地址從用戶空間拷貝到內核空間 */
	err = move_addr_to_kernel(uservaddr, addrlen, &address);
	if (err < 0)
		goto out_put;

	err =
	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
	if (err)
		goto out_put;
/* 調用Socket層的操作函數,如果是SOCK_STREAM,則proto_ops為inet_stream_ops,
     * 函數指針指向inet_stream_connect()。
     */
	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
				 sock->file->f_flags);
out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
}

 

int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
			int addr_len, int flags)
{
	int err;

	lock_sock(sock->sk);//進入互斥區
	err = __inet_stream_connect(sock, uaddr, addr_len, flags);
	release_sock(sock->sk);
	return err;
}

 

/*
 *	Connect to a remote host. There is regrettably still a little
 *	TCP 'magic' in here.
 */
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
			  int addr_len, int flags)
{
	struct sock *sk = sock->sk;
	int err;
	long timeo;
/*  長度合法性檢查*/
	if (addr_len < sizeof(uaddr->sa_family))
		return -EINVAL;

	if (uaddr->sa_family == AF_UNSPEC) {/*  如果協議族為
AF_UNSPEC ,則先執行*/
		err = sk->sk_prot->disconnect(sk, flags);
/* 根據是否成功斷開連接,來設置socket狀態 */
		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
		goto out;
	}

	switch (sock->state) {
	default:
		err = -EINVAL;
		goto out;
 /* 此套接口已經和對端的套接口相連接了,即連接已經建立 */
	case SS_CONNECTED:
		err = -EISCONN;/* Transport endpoint is already connected */
		goto out;
	case SS_CONNECTING:/*連接正在建立中 */
		err = -EALREADY;/* Operation already in progress */
		/* Fall out of switch with err, set for this state */
		break;
	case SS_UNCONNECTED:
		err = -EISCONN;
		if (sk->sk_state != TCP_CLOSE)
			goto out;
/* 如果使用的是TCP,則sk_prot為tcp_prot,connect為tcp_v4_connect() */
		err = sk->sk_prot->connect(sk, uaddr, addr_len);/* 發送SYN包 */
		if (err < 0)
			goto out;
/* 發出SYN包后socket狀態設為正在連接 */
		sock->state = SS_CONNECTING;

		/* Just entered SS_CONNECTING state; the only
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		err = -EINPROGRESS;
		break;
	}
 /* sock的發送超時時間,非阻塞則為0 */
	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
/* 發出SYN包后,等待后續握手的完成 */
	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
		int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
				tcp_sk(sk)->fastopen_req &&
				tcp_sk(sk)->fastopen_req->data ? 1 : 0;
/* 如果是非阻塞的,那么就直接返回錯誤碼-EINPROGRESS。
         * socket為阻塞時,使用inet_wait_for_connect()來等待協議棧的處理:
         * 1. 使用SO_SNDTIMEO,睡眠時間超過timeo就返回0,之后返回錯誤碼-EINPROGRESS。
         * 2. 收到信號,就返回剩余的等待時間。之后會返回錯誤碼-ERESTARTSYS或-EINTR。
         * 3. 三次握手成功,被sock I/O事件處理函數喚醒,之后會返回0。
         */

		/* Error code is set above */
		if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
			goto out;

		err = sock_intr_errno(timeo);
 /* 進程收到信號,如果err為-ERESTARTSYS,接下來庫函數會重新調用connect() */

		if (signal_pending(current))
			goto out;
	}

	/* Connection was closed by RST, timeout, ICMP error
	 * or another process disconnected us.
	 */
	if (sk->sk_state == TCP_CLOSE)
		goto sock_error;

	/* sk->sk_err may be not zero now, if RECVERR was ordered by user
	 * and error was received after socket entered established state.
	 * Hence, it is handled normally after connect() return successfully.
	 */
/* 更新socket狀態為連接已建立 */
	sock->state = SS_CONNECTED;
	err = 0;
out:
	return err;

sock_error:
	err = sock_error(sk) ? : -ECONNABORTED;
	sock->state = SS_UNCONNECTED;
	if (sk->sk_prot->disconnect(sk, flags))
		sock->state = SS_DISCONNECTING;
	goto out;
}
EXPORT_SYMB

 

static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
{
	DEFINE_WAIT(wait);
/* 把等待任務加入到socket的等待隊列頭部,把進程的狀態設為TASK_INTERRUPTIBLE */
	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
	sk->sk_write_pending += writebias;

	/* Basic assumption: if someone sets sk->sk_err, he _must_
	 * change state of the socket from TCP_SYN_*.
	 * Connect() does not allow to get error notifications
	 * without closing the socket.
	 */
/* 完成三次握手后,狀態就會變為TCP_ESTABLISHED,從而退出循環 */
	while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
		release_sock(sk);
 /* 進入睡眠,直到超時或收到信號,或者被I/O事件處理函數喚醒。
         * 1. 如果是收到信號退出的,timeo為剩余的jiffies。
         * 2. 如果使用了SO_SNDTIMEO選項,超時退出后,timeo為0。
         * 3. 如果沒有使用SO_SNDTIMEO選項,timeo為無窮大,即MAX_SCHEDULE_TIMEOUT,
         *      那么返回值也是這個,而超時時間不定。為了無限阻塞,需要上面的while循環。
         */

		timeo = schedule_timeout(timeo);
		lock_sock(sk);
/* 如果進程有待處理的信號,或者睡眠超時了,退出循環,之后會返回錯誤碼 */
		if (signal_pending(current) || !timeo)
			break;
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
	}
 /* 等待結束時,把等待進程從等待隊列中刪除,把當前進程的狀態設為TASK_RUNNING */
	finish_wait(sk_sleep(sk), &wait);
	sk->sk_write_pending -= writebias;
	return timeo;
}
/**/
進程的喚醒
 

三次握手中,當客戶端收到SYNACK、發出ACK后,連接就成功建立了。

此時連接的狀態從TCP_SYN_SENT或TCP_SYN_RECV變為TCP_ESTABLISHED,sock的狀態發生變化,

會調用sock_def_wakeup()來處理連接狀態變化事件,喚醒進程,connect()就能成功返回了。

sock_def_wakeup()的函數調用路徑如下:

tcp_v4_rcv

tcp_v4_do_rcv

tcp_rcv_state_process

tcp_rcv_synsent_state_process

tcp_finish_connect

sock_def_wakeup

wake_up_interruptible_all

__wake_up

 

void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);

	tcp_set_state(sk, TCP_ESTABLISHED);

	------------------------
        ----------------------------   
	
	if (!sock_flag(sk, SOCK_DEAD)) {
		sk->sk_state_change(sk);---->// 指向sock_def_wakeup
/* 如果使用了異步通知,則發送SIGIO通知進程可寫 */
		sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
	}
}    

static inline void sk_wake_async(struct sock *sk, int how, int band)
{
	if (sock_flag(sk, SOCK_FASYNC))
		sock_wake_async(sk->sk_socket, how, band);
}

static void sock_def_wakeup(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
	if (wq_has_sleeper(wq))
		wake_up_interruptible_all(&wq->wait);
	rcu_read_unlock();
}

//最終調用__wake_up_common(),由於nr_exclusive為0,會把此socket上所有的等待進程都喚醒

 udp_prot 是 UDP 協議中所有自定義操作函數的集合。其 connect 的實現函數為 ip4_datagram_connect 。
其主要是設置了目的 IP 、端口和路由信息

int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
	struct flowi4 *fl4;
	struct rtable *rt;
	__be32 saddr;
	int oif;
	int err;


	if (addr_len < sizeof(*usin))
		return -EINVAL;

	if (usin->sin_family != AF_INET)
		return -EAFNOSUPPORT;
	//復位路由高速緩沖區的入口地址
	sk_dst_reset(sk);

	lock_sock(sk);
	//和套接字綁定的網絡設備索引號

	oif = sk->sk_bound_dev_if;
	saddr = inet->inet_saddr;
	//如果建立連接的地址是組傳送地址,meiyou jiu 重新初始化oif和原地址
	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
		if (!oif)
			oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
	}
	fl4 = &inet->cork.fl.u.ip4;
	/*
	調用ip_route_connet尋找路由,
	源路由主要根據源地址、源端口、目的地址、目的端口、輸出網絡設備額索引號,
	如果尋找路由失敗就返回錯誤,如果尋找的路由是廣播地址路由就要是否路由在高速
	緩沖區的入口並返回錯誤。尋找路由成功就把套接字的狀態變量sk_state設置為TCP_ESTABLISHED,
	並把路由保存到套接字的sk->sk_dst_cache數據域
	*/
	rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
			      RT_CONN_FLAGS(sk), oif,
			      sk->sk_protocol,
			      inet->inet_sport, usin->sin_port, sk, true);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		if (err == -ENETUNREACH)
			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
		goto out;
	}
	//尋找的路由是廣播地址路由,則釋放該路由在路由緩沖區的入口

	if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
		ip_rt_put(rt);
		err = -EACCES;
		goto out;
	}
	if (!inet->inet_saddr)//從路由表中獲取的信息更新udp的原地址
		inet->inet_saddr = fl4->saddr;	/* Update source address */
	if (!inet->inet_rcv_saddr) {
		inet->inet_rcv_saddr = fl4->saddr;
		if (sk->sk_prot->rehash)
			sk->sk_prot->rehash(sk);
	}//更新目的地址和目的端口,源端口已經給定了
	inet->inet_daddr = fl4->daddr;
	inet->inet_dport = usin->sin_port;
	sk->sk_state = TCP_ESTABLISHED;
	inet->inet_id = jiffies;

	sk_dst_set(sk, &rt->dst);
	err = 0;
out:
	release_sock(sk);
	return err;
}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM