深入理解TCP協議及其源代碼-send和recv背后數據的收發過程


send和recv背后數據的收發過程

send和recv是TCP常用的發送數據和接受數據函數,這兩個函數具體在linux內核的代碼實現上是如何實現的呢?
ssize_t recv(int sockfd, void *buf, size_t len, int flags)
ssize_t send(int sockfd, const void *buf, size_t len, int flags)

理論分析

對於send函數,比較容易理解,捋一下計算機網絡的知識,可以大概的到實現的方法,首先TCP是面向連接的,會有三次握手,建立連接成功,即代表兩個進程可以用send和recv通信,作為發送信息的一方,肯定是接收到了從用戶程序發送數據的請求,即send函數的參數之一,接收到數據后,若數據的大小超過一定長度,肯定不可能直接發送除去,因此,首先要對數據分段,將數據分成一個個的代碼段,其次,TCP協議位於傳輸層,有響應的頭部字段,在傳輸時肯定要加在數據前,數據也就被准備好了。當然,TCP是沒有能力直接通過物理鏈路發送出去的,要想數據正確傳輸,還需要一層一層的進行。所以,最后一步是將數據傳遞給網絡層,網絡層再封裝,然后鏈路層、物理層,最后被發送除去。總結一下就是:
1.數據分段
2.封裝頭部
3.傳遞給下一層。

對於secv函數,有一個不太能理解的就是,作為接收方,我是否是一直在等待其他進程給我發送數據,如果是,那么就應該是不停的判斷是否有數到了,如果有,就把數據保存起來,然后執行send的逆過程即可。若沒有一直等,那就可能是進程被掛起了,如果有數據到達,內核通過中斷喚醒進程,然后接收數據。至於具體是哪種,可以通過代碼和調試得到結果。

源碼分析

首先,當調用send()函數時,內核封裝send()為sendto(),然后發起系統調用。其實也很好理解,send()就是sendto()的一種特殊情況,而sendto()在內核的系統調用服務程序為sys_sendto

int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
		 struct sockaddr __user *addr,  int addr_len)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err;
	struct msghdr msg;
	struct iovec iov;
	int fput_needed;
	err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
	if (unlikely(err))
		return err;
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (!sock)
		goto out;

	msg.msg_name = NULL;
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	msg.msg_namelen = 0;
	if (addr) {
		err = move_addr_to_kernel(addr, addr_len, &address);
		if (err < 0)
			goto out_put;
		msg.msg_name = (struct sockaddr *)&address;
		msg.msg_namelen = addr_len;
	}
	if (sock->file->f_flags & O_NONBLOCK)
		flags |= MSG_DONTWAIT;
	msg.msg_flags = flags;
	err = sock_sendmsg(sock, &msg);

out_put:
	fput_light(sock->file, fput_needed);
out:
	return err;
}

這里定義了一個struct msghdr msg,他是用來表示要發送的數據的一些屬性。

struct msghdr {
	void		*msg_name;	/* 接收方的struct sockaddr結構體地址 (用於udp)*/
	int		msg_namelen;	/*  接收方的struct sockaddr結構體地址(用於udp)*/
	struct iov_iter	msg_iter;	/* io緩沖區的地址 */
	void		*msg_control;	/* 輔助數據的地址 */
	__kernel_size_t	msg_controllen;	/* 輔助數據的長度 */
	unsigned int	msg_flags;	/*接受消息的表示 */
	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
};

還有一個struct iovec,他被稱為io向量,故名思意,用來表示io數據的一些信息。

struct iovec
{
	void __user *iov_base;	/* 要傳輸數據的用戶態下的地址*) */
	__kernel_size_t iov_len; /*要傳輸數據的長度 */
};

所以,__sys_sendto函數其實做了3件事:1.通過fd獲取了對應的struct socket2.創建了用來描述要發送的數據的結構體struct msghdr。3.調用了sock_sendmsg來執行實際的發送。繼續追蹤這個函數,會看到最終調用的是sock->ops->sendmsg(sock, msg, msg_data_left(msg));,即socet在初始化時賦值給結構體struct proto tcp_prot的函數tcp_sendmsg

struct proto tcp_prot = {
	.name			= "TCP",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
	.pre_connect		= tcp_v4_pre_connect,
	.connect		= tcp_v4_connect,
	.disconnect		= tcp_disconnect,
	.accept			= inet_csk_accept,
	.ioctl			= tcp_ioctl,
	.init			= tcp_v4_init_sock,
	.destroy		= tcp_v4_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
	.keepalive		= tcp_set_keepalive,
	.recvmsg		= tcp_recvmsg,
	.sendmsg		= tcp_sendmsg,
	......

tcp_sendmsg實際上調用的是int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)

int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
{
	struct tcp_sock *tp = tcp_sk(sk);/*進行了強制類型轉換*/
	struct sk_buff *skb;
	flags = msg->msg_flags;
	......
		if (copied)
			tcp_push(sk, flags & ~MSG_MORE, mss_now,
				 TCP_NAGLE_PUSH, size_goal);
}

tcp_sendmsg_locked中,完成的是將所有的數據組織成發送隊列,這個發送隊列是struct sock結構中的一個域sk_write_queue,這個隊列的每一個元素是一個skb,里面存放的就是待發送的數據。然后調用了tcp_push()函數。

struct sock{
	...
	struct sk_buff_head	sk_write_queue;/*指向skb隊列的第一個元素*/
	...
	struct sk_buff	*sk_send_head;/*指向隊列第一個還沒有發送的元素*/
}

在tcp協議的頭部有幾個標志字段:URG、ACK、RSH、RST、SYN、FIN,tcp_push中會判斷這個skb的元素是否需要push,如果需要就將tcp頭部字段的push置一,置一的過程如下:

static void tcp_push(struct sock *sk, int flags, int mss_now,
		     int nonagle, int size_goal)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct sk_buff *skb;

	skb = tcp_write_queue_tail(sk);
	if (!skb)
		return;
	if (!(flags & MSG_MORE) || forced_push(tp))
		tcp_mark_push(tp, skb);

	tcp_mark_urg(tp, flags);

	if (tcp_should_autocork(sk, skb, size_goal)) {

		/* avoid atomic op if TSQ_THROTTLED bit is already set */
		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
		}
		/* It is possible TX completion already happened
		 * before we set TSQ_THROTTLED.
		 */
		if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
			return;
	}

	if (flags & MSG_MORE)
		nonagle = TCP_NAGLE_CORK;

	__tcp_push_pending_frames(sk, mss_now, nonagle);
}

整個過程會有點繞,首先struct tcp_skb_cb結構體存放的就是tcp的頭部,頭部的控制位為tcp_flags,通過tcp_mark_push會將skb中的cb,也就是48個字節的數組,類型轉換為struct tcp_skb_cb ,這樣位於skb的cb就成了tcp的頭部。

static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
	TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
	tp->pushed_seq = tp->write_seq;
}

...
#define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
...

struct sk_buff {
	...	
	char			cb[48] __aligned(8);
	...
struct tcp_skb_cb {
	__u32		seq;		/* Starting sequence number	*/
	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
	__u8		tcp_flags;	/* tcp頭部標志,位於第13個字節tcp[13])	*/
	......
};

然后,tcp_push調用了__tcp_push_pending_frames(sk, mss_now, nonagle);函數發送數據:

void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
			       int nonagle)
{

	if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
			   sk_gfp_mask(sk, GFP_ATOMIC)))
		tcp_check_probe_timer(sk);
}

隨后又調用了tcp_write_xmit來發送數據:

static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
			   int push_one, gfp_t gfp)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct sk_buff *skb;
	unsigned int tso_segs, sent_pkts;
	int cwnd_quota;
	int result;
	bool is_cwnd_limited = false, is_rwnd_limited = false;
	u32 max_segs;
	/*統計已發送的報文總數*/
	sent_pkts = 0;
	......

	/*若發送隊列未滿,則准備發送報文*/
	while ((skb = tcp_send_head(sk))) {
		unsigned int limit;

		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
			/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
			skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
			list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
			tcp_init_tso_segs(skb, mss_now);
			goto repair; /* Skip network transmission */
		}

		if (tcp_pacing_check(sk))
			break;

		tso_segs = tcp_init_tso_segs(skb, mss_now);
		BUG_ON(!tso_segs);
		/*檢查發送窗口的大小*/
		cwnd_quota = tcp_cwnd_test(tp, skb);
		if (!cwnd_quota) {
			if (push_one == 2)
				/* Force out a loss probe pkt. */
				cwnd_quota = 1;
			else
				break;
		}

		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
			is_rwnd_limited = true;
			break;
		......
		limit = mss_now;
		if (tso_segs > 1 && !tcp_urg_mode(tp))
			limit = tcp_mss_split_point(sk, skb, mss_now,
						    min_t(unsigned int,
							  cwnd_quota,
							  max_segs),
						    nonagle);

		if (skb->len > limit &&
		    unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
					  skb, limit, mss_now, gfp)))
			break;

		if (tcp_small_queue_check(sk, skb, 0))
			break;

		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
			break;
	......

tcp_write_xmit位於tcpoutput.c中,它實現了tcp的擁塞控制,然后調用了tcp_transmit_skb(sk, skb, 1, gfp)傳輸數據,實際上調用的是__tcp_transmit_skb

static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
			      int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
	
	skb_push(skb, tcp_header_size);
	skb_reset_transport_header(skb);
	......
	/* 構建TCP頭部和校驗和 */
	th = (struct tcphdr *)skb->data;
	th->source		= inet->inet_sport;
	th->dest		= inet->inet_dport;
	th->seq			= htonl(tcb->seq);
	th->ack_seq		= htonl(rcv_nxt);

	tcp_options_write((__be32 *)(th + 1), tp, &opts);
	skb_shinfo(skb)->gso_type = sk->sk_gso_type;
	if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
		th->window      = htons(tcp_select_window(sk));
		tcp_ecn_send(sk, skb, th, tcp_header_size);
	} else {
		/* RFC1323: The window in SYN & SYN/ACK segments
		 * is never scaled.
		 */
		th->window	= htons(min(tp->rcv_wnd, 65535U));
	}
	......
	icsk->icsk_af_ops->send_check(sk, skb);

	if (likely(tcb->tcp_flags & TCPHDR_ACK))
		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);

	if (skb->len != tcp_header_size) {
		tcp_event_data_sent(tp, sk);
		tp->data_segs_out += tcp_skb_pcount(skb);
		tp->bytes_sent += skb->len - tcp_header_size;
	}

	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
			      tcp_skb_pcount(skb));

	tp->segs_out += tcp_skb_pcount(skb);
	/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
	skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
	skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);

	/* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */

	/* Cleanup our debris for IP stacks */
	memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
			       sizeof(struct inet6_skb_parm)));

	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
	......
}

tcp_transmit_skb是tcp發送數據位於傳輸層的最后一步,這里首先對TCP數據段的頭部進行了處理,然后調用了網絡層提供的發送接口icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);實現了數據的發送,自此,數據離開了傳輸層,傳輸層的任務也就結束了。

對於recv函數,與send類似,自然也是recvfrom的特殊情況,調用的也就是__sys_recvfrom,整個函數的調用路徑與send非常類似:

int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
		   struct sockaddr __user *addr, int __user *addr_len)
{
	......
	err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
	if (unlikely(err))
		return err;
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	.....
	msg.msg_control = NULL;
	msg.msg_controllen = 0;
	/* Save some cycles and don't copy the address if not needed */
	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
	/* We assume all kernel code knows the size of sockaddr_storage */
	msg.msg_namelen = 0;
	msg.msg_iocb = NULL;
	msg.msg_flags = 0;
	if (sock->file->f_flags & O_NONBLOCK)
		flags |= MSG_DONTWAIT;
	err = sock_recvmsg(sock, &msg, flags);

	if (err >= 0 && addr != NULL) {
		err2 = move_addr_to_user(&address,
					 msg.msg_namelen, addr, addr_len);
	.....
}

__sys_recvfrom調用了sock_recvmsg來接收數據,整個函數實際調用的是sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);,同樣,根據tcp_prot結構的初始化,調用的其實是tcp_rcvmsg
.接受函數比發送函數要復雜得多,因為數據接收不僅僅只是接收,tcp的三次握手也是在接收函數實現的,所以收到數據后要判斷當前的狀態,是否正在建立連接等,根據發來的信息考慮狀態是否要改變,在這里,我們僅僅考慮在連接建立后數據的接收。

int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
		int flags, int *addr_len)
{
	......
	if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
	    (sk->sk_state == TCP_ESTABLISHED))
		sk_busy_loop(sk, nonblock);

	lock_sock(sk);
	.....
		if (unlikely(tp->repair)) {
		err = -EPERM;
		if (!(flags & MSG_PEEK))
			goto out;

		if (tp->repair_queue == TCP_SEND_QUEUE)
			goto recv_sndq;

		err = -EINVAL;
		if (tp->repair_queue == TCP_NO_QUEUE)
			goto out;
	......
		last = skb_peek_tail(&sk->sk_receive_queue);
		skb_queue_walk(&sk->sk_receive_queue, skb) {
			last = skb;
	......
			if (!(flags & MSG_TRUNC)) {
			err = skb_copy_datagram_msg(skb, offset, msg, used);
			if (err) {
				/* Exception. Bailout! */
				if (!copied)
					copied = -EFAULT;
				break;
			}
		}

		*seq += used;
		copied += used;
		len -= used;

		tcp_rcv_space_adjust(sk);
	

這里共維護了三個隊列:prequeuebacklogreceive_queue,分別為預處理隊列,后備隊列和接收隊列,在連接建立后,若沒有數據到來,接收隊列為空,進程會在sk_busy_loop函數內循環等待,知道接收隊列不為空,並調用函數數skb_copy_datagram_msg將接收到的數據拷貝到用戶態,實際調用的是__skb_datagram_iter,這里同樣用了struct msghdr *msg來實現。

int __skb_datagram_iter(const struct sk_buff *skb, int offset,
			struct iov_iter *to, int len, bool fault_short,
			size_t (*cb)(const void *, size_t, void *, struct iov_iter *),
			void *data)
{
	int start = skb_headlen(skb);
	int i, copy = start - offset, start_off = offset, n;
	struct sk_buff *frag_iter;

	/* 拷貝tcp頭部 */
	if (copy > 0) {
		if (copy > len)
			copy = len;
		n = cb(skb->data + offset, copy, data, to);
		offset += n;
		if (n != copy)
			goto short_copy;
		if ((len -= copy) == 0)
			return 0;
	}

	/* 拷貝數據部分 */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		int end;
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

		WARN_ON(start > offset + len);

		end = start + skb_frag_size(frag);
		if ((copy = end - offset) > 0) {
			struct page *page = skb_frag_page(frag);
			u8 *vaddr = kmap(page);

			if (copy > len)
				copy = len;
			n = cb(vaddr + frag->page_offset +
				offset - start, copy, data, to);
			kunmap(page);
			offset += n;
			if (n != copy)
				goto short_copy;
			if (!(len -= copy))
				return 0;
		}
		start = end;
	}

拷貝完成后,函數返回,整個接收的過程也就完成了。
整體來講與我們的分析並不大,用一張函數間的相互調用圖可以表示

gdb調試分析

接下來用gdb調試驗證上面的分析,調試環境為linux5.0.1+menuos(64位),測試程序為hello/hi網絡聊天程序,將其植入到menuos,命令為client。
首先看send的調用關系,分別將斷點打在__sys_sendtotcp_sendmsg_lockedtcp_push__tcp_push_pending_framestcp_write_xmit__tcp_transmit_skb。觀察函數的調用順序,與我們的分析是否一致。

(gdb) file vmlinux
Reading symbols from vmlinux...done.
warning: File "/home/netlab/netlab/linux-5.2.7/scripts/gdb/vmlinux-gdb.py" auto-loading has been declined by your `auto-load safe-path' set to "$debugdir:$datadir/auto-load".
To enable execution of this file add
	add-auto-load-safe-path /home/netlab/netlab/linux-5.2.7/scripts/gdb/vmlinux-gdb.py
line to your configuration file "/home/netlab/.gdbinit".
To completely disable this security protection add
	set auto-load safe-path /
line to your configuration file "/home/netlab/.gdbinit".
For more information about this security protection see the
"Auto-loading safe path" section in the GDB manual.  E.g., run from the shell:
	info "(gdb)Auto-loading safe path"
(gdb) target remote: 1234
Remote debugging using : 1234
0x0000000000000000 in fixed_percpu_data ()
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
default_idle () at arch/x86/kernel/process.c:581
581		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
(gdb) b __sys_sendto
Breakpoint 1 at 0xffffffff817ef560: file net/socket.c, line 1929.
(gdb) b tcp_sendmsg_locked
Breakpoint 2 at 0xffffffff81895000: file net/ipv4/tcp.c, line 1158.
(gdb) b tcp_push
Breakpoint 3 at 0xffffffff818907c0: file ./include/linux/skbuff.h, line 1766.
(gdb) b __tcp_push_pending_frames
Breakpoint 4 at 0xffffffff818a44a0: file net/ipv4/tcp_output.c, line 2584.
(gdb) b tcp_wrtie_xmit
Function "tcp_wrtie_xmit" not defined.
Make breakpoint pending on future shared library load? (y or [n]) n
(gdb) b tcp_write_xmit
Breakpoint 5 at 0xffffffff818a32d0: file net/ipv4/tcp_output.c, line 2330.
(gdb) b __tcp_transmit_skb
Breakpoint 6 at 0xffffffff818a2830: file net/ipv4/tcp_output.c, line 1015.
(gdb) 

執行client命令,觀察程序暫停的位置:

Breakpoint 6, __tcp_transmit_skb (sk=0xffff888006478880, 
    skb=0xffff888006871400, clone_it=1, gfp_mask=3264, rcv_nxt=0)
    at net/ipv4/tcp_output.c:1015
1015	{
(gdb) 

並非我們預想的那樣,程序停在了__tcp_transmit_skb,但仔細分析,這應該是三次握手的過程,繼續調試

Breakpoint 6, __tcp_transmit_skb (sk=0xffff888006478880, 
    skb=0xffff888006871400, clone_it=1, gfp_mask=3264, rcv_nxt=0)
    at net/ipv4/tcp_output.c:1015
1015	{
(gdb) c
Continuing.

Breakpoint 6, __tcp_transmit_skb (sk=0xffff888006478880, 
    skb=0xffff88800757a100, clone_it=0, gfp_mask=0, rcv_nxt=1155786088)
    at net/ipv4/tcp_output.c:1015
1015	{
(gdb) c
Continuing.

又有兩次停在了這里,恰恰驗證了猜想,因為這個程序的服務端和客戶端都在同一台主機上,共用了同一個TCP協議棧,在TCP三次握手時,客戶端發送兩次,服務端發送一次,恰好三次。下面我們用客戶端向服務器端發送,分析程序的調用過程:

Breakpoint 1, __sys_sendto (fd=5, buff=0x7ffc33c54bc0, len=2, flags=0, 
    addr=0x0 <fixed_percpu_data>, addr_len=0) at net/socket.c:1929
1929	{
(gdb) c
Continuing.

Breakpoint 2, tcp_sendmsg_locked (sk=0xffff888006479100, 
    msg=0xffffc900001f7e28, size=2) at net/ipv4/tcp.c:1158
1158	{
(gdb) c
Continuing.

Breakpoint 3, tcp_push (sk=0xffff888006479100, flags=0, mss_now=32752, 
    nonagle=0, size_goal=32752) at net/ipv4/tcp.c:699
699		skb = tcp_write_queue_tail(sk);
(gdb) c
Continuing.

Breakpoint 4, __tcp_push_pending_frames (sk=0xffff888006479100, cur_mss=32752, 
    nonagle=0) at net/ipv4/tcp_output.c:2584
2584		if (unlikely(sk->sk_state == TCP_CLOSE))
(gdb) c
Continuing.

Breakpoint 5, tcp_write_xmit (sk=0xffff888006479100, mss_now=32752, nonagle=0, 
    push_one=0, gfp=2592) at net/ipv4/tcp_output.c:2330
2330	{
(gdb) c
Continuing.

Breakpoint 6, __tcp_transmit_skb (sk=0xffff888006479100, 
    skb=0xffff888006871400, clone_it=1, gfp_mask=2592, rcv_nxt=405537035)
    at net/ipv4/tcp_output.c:1015
1015	{
(gdb) c
Continuing.

Breakpoint 6, __tcp_transmit_skb (sk=0xffff888006478880, 
    skb=0xffff88800757a100, clone_it=0, gfp_mask=0, rcv_nxt=1155786090)
    at net/ipv4/tcp_output.c:1015
1015	{
(gdb) c

可以看到,與我們分析的順序是一致的,但是最后__tcp_transmit_skb調用了兩次,經過仔細分析,終於找到原因——這是接收方接收到數據后發送ACK使用的。
驗證完send,來驗證一下recv
將斷點分別設在:
__sys_recvfromsock_recvmsgtcp_rcvmsg__skb_datagram_iter處,以同樣的方式觀察:

Breakpoint 1, __sys_recvfrom (fd=5, ubuf=0x7ffd9428d960, size=1024, flags=0, 
    addr=0x0 <fixed_percpu_data>, addr_len=0x0 <fixed_percpu_data>)
    at net/socket.c:1990
1990	{
(gdb) c
Continuing.

Breakpoint 2, sock_recvmsg (sock=0xffff888006df1900, msg=0xffffc900001f7e28, 
    flags=0) at net/socket.c:891
891	{
(gdb) c
Continuing.

Breakpoint 3, tcp_recvmsg (sk=0xffff888006479100, msg=0xffffc900001f7e28, 
    len=1024, nonblock=0, flags=0, addr_len=0xffffc900001f7df4)
    at net/ipv4/tcp.c:1933
1933	{
(gdb) c

在未發送之前,程序也會暫停在斷點處,根據之前的分析,這也是三次握手的過程,但是為什么沒有__skb_datagram_iter呢?,因為三次握手時,並沒有發送數據過來,所以並沒有數據被拷貝到用戶空間。
同樣,嘗試發送數據觀察調用過程。

Breakpoint 1, __sys_recvfrom (fd=5, ubuf=0x7ffd9428d960, size=1024, flags=0, 
    addr=0x0 <fixed_percpu_data>, addr_len=0x0 <fixed_percpu_data>)
    at net/socket.c:1990
1990	{
(gdb) c
Continuing.

Breakpoint 2, sock_recvmsg (sock=0xffff888006df1900, msg=0xffffc900001f7e28, 
    flags=0) at net/socket.c:891
891	{
(gdb) c
Continuing.

Breakpoint 3, tcp_recvmsg (sk=0xffff888006479100, msg=0xffffc900001f7e28, 
    len=1024, nonblock=0, flags=0, addr_len=0xffffc900001f7df4)
    at net/ipv4/tcp.c:1933
1933	{
(gdb) c
Continuing.

Breakpoint 4, __skb_datagram_iter (skb=0xffff8880068714e0, offset=0, 
    to=0xffffc900001efe38, len=2, fault_short=false, 
    cb=0xffffffff817ff860 <simple_copy_to_iter>, data=0x0 <fixed_percpu_data>)
    at net/core/datagram.c:414
414	{

驗證完畢。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM