概述
tcp_transmit_skb的作用是復制或者拷貝skb,構造skb中的tcp首部,並將調用網絡層的發送函數發送skb;在發送前,首先需要克隆或者復制skb,因為在成功發送到網絡設備之后,skb會釋放,而tcp層不能真正的釋放,是需要等到對該數據段的ack才可以釋放;然后構造tcp首部和選項;最后調用網絡層提供的發送回調函數發送skb,ip層的回調函數為ip_queue_xmit;
源碼分析
1 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, 2 gfp_t gfp_mask) 3 { 4 const struct inet_connection_sock *icsk = inet_csk(sk); 5 struct inet_sock *inet; 6 struct tcp_sock *tp; 7 struct tcp_skb_cb *tcb; 8 struct tcp_out_options opts; 9 unsigned int tcp_options_size, tcp_header_size; 10 struct tcp_md5sig_key *md5; 11 struct tcphdr *th; 12 int err; 13 14 BUG_ON(!skb || !tcp_skb_pcount(skb)); 15 tp = tcp_sk(sk); 16 17 /* 需要克隆 */ 18 if (clone_it) { 19 skb_mstamp_get(&skb->skb_mstamp); 20 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 21 - tp->snd_una; 22 tcp_rate_skb_sent(sk, skb); 23 24 /* 如果skb已經是被克隆過的,那么只能復制 */ 25 if (unlikely(skb_cloned(skb))) 26 skb = pskb_copy(skb, gfp_mask); 27 /* 未被克隆過,則克隆之 */ 28 else 29 skb = skb_clone(skb, gfp_mask); 30 31 /* 復制或者克隆失敗 */ 32 if (unlikely(!skb)) 33 return -ENOBUFS; 34 } 35 36 inet = inet_sk(sk); 37 tcb = TCP_SKB_CB(skb); 38 memset(&opts, 0, sizeof(opts)); 39 40 /* 計算syn包tcp選項長度 */ 41 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) 42 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 43 /* 計算已連接狀態tcp選項長度 */ 44 else 45 tcp_options_size = tcp_established_options(sk, skb, &opts, 46 &md5); 47 /* 計算tcp頭部長度 */ 48 tcp_header_size = tcp_options_size + sizeof(struct tcphdr); 49 50 /* if no packet is in qdisc/device queue, then allow XPS to select 51 * another queue. We can be called from tcp_tsq_handler() 52 * which holds one reference to sk_wmem_alloc. 53 * 54 * TODO: Ideally, in-flight pure ACK packets should not matter here. 55 * One way to get this would be to set skb->truesize = 2 on them. 56 */ 57 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); 58 59 /* If we had to use memory reserve to allocate this skb, 60 * this might cause drops if packet is looped back : 61 * Other socket might not have SOCK_MEMALLOC. 62 * Packets not looped back do not care about pfmemalloc. 63 */ 64 skb->pfmemalloc = 0; 65 66 /* 加入tcp頭 */ 67 skb_push(skb, tcp_header_size); 68 skb_reset_transport_header(skb); 69 70 /* 與控制塊解除關聯 */ 71 skb_orphan(skb); 72 73 /* 與控制塊建立關聯 */ 74 skb->sk = sk; 75 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; 76 skb_set_hash_from_sk(skb, sk); 77 78 /* 增加分配的內存 */ 79 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 80 81 skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); 82 83 /* Build TCP header and checksum it. */ 84 /* 構造tcp頭 */ 85 th = (struct tcphdr *)skb->data; 86 th->source = inet->inet_sport; 87 th->dest = inet->inet_dport; 88 th->seq = htonl(tcb->seq); 89 th->ack_seq = htonl(tp->rcv_nxt); 90 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 91 tcb->tcp_flags); 92 93 th->check = 0; 94 th->urg_ptr = 0; 95 96 /* The urg_mode check is necessary during a below snd_una win probe */ 97 /* 緊急模式 */ 98 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { 99 if (before(tp->snd_up, tcb->seq + 0x10000)) { 100 th->urg_ptr = htons(tp->snd_up - tcb->seq); 101 th->urg = 1; 102 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { 103 th->urg_ptr = htons(0xFFFF); 104 th->urg = 1; 105 } 106 } 107 108 /* 寫入tcp選項 */ 109 tcp_options_write((__be32 *)(th + 1), tp, &opts); 110 skb_shinfo(skb)->gso_type = sk->sk_gso_type; 111 112 /* syn需要選擇通告窗口 */ 113 if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { 114 th->window = htons(tcp_select_window(sk)); 115 tcp_ecn_send(sk, skb, th, tcp_header_size); 116 } 117 /* 其他需要設置接收窗口 */ 118 else { 119 /* RFC1323: The window in SYN & SYN/ACK segments 120 * is never scaled. 121 */ 122 th->window = htons(min(tp->rcv_wnd, 65535U)); 123 } 124 #ifdef CONFIG_TCP_MD5SIG 125 /* Calculate the MD5 hash, as we have all we need now */ 126 if (md5) { 127 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 128 tp->af_specific->calc_md5_hash(opts.hash_location, 129 md5, sk, skb); 130 } 131 #endif 132 /* 計算校驗和 */ 133 icsk->icsk_af_ops->send_check(sk, skb); 134 135 /* ack處理,快速模式數量-以及定時器清除 */ 136 if (likely(tcb->tcp_flags & TCPHDR_ACK)) 137 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 138 139 /* 有數據要發送 */ 140 if (skb->len != tcp_header_size) { 141 tcp_event_data_sent(tp, sk); 142 tp->data_segs_out += tcp_skb_pcount(skb); 143 } 144 145 /* 統計分段數 */ 146 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 147 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 148 tcp_skb_pcount(skb)); 149 150 /* 發送的總分段數統計 */ 151 tp->segs_out += tcp_skb_pcount(skb); 152 153 /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ 154 /* skb中分段數統計 */ 155 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); 156 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); 157 158 /* Our usage of tstamp should remain private */ 159 skb->tstamp = 0; 160 161 /* Cleanup our debris for IP stacks */ 162 /* 清空tcb,ip層要使用 */ 163 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), 164 sizeof(struct inet6_skb_parm))); 165 166 /* 發送skb */ 167 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); 168 169 /* 發送成功或失敗 */ 170 if (likely(err <= 0)) 171 return err; 172 173 /* 擁塞控制 */ 174 175 /* 進入cwr */ 176 tcp_enter_cwr(sk); 177 178 /* 根據err返回成功與否 */ 179 return net_xmit_eval(err); 180 }