OVS 內核KEY值提取及匹配流表代碼分析


原文鏈接:http://ry0117.com/2016/12/24/OVS內核KEY值提取及匹配流表代碼分析/

 

當開啟OVS后,創建datapath類型為system的網橋並他添加相關接口,OVS網橋內接口在網卡接收到數據包后,數據包會先到OVS的內核模塊openvswitch內,從數據包上提取key值,並使用key值匹配OVS內核模塊中的流表,當匹配到相應的流表后,則執行流表上相應的動作;

當在OVS內核緩存中匹配不到流表,則將key值信息通過NetLink發送給用戶態的ovs-vswitchd守護進程,由其來決定如何處理數據包。

下面就Linux-3.19版本內核中OpenvSwitch內核模塊中的提取Key值、匹配流表及執行流表動作相關的代碼做一下分析。

 

 

提取KEY值(datapath/flow.c)

Key值信息是匹配流表的前提,key值中包括很多的信息,包括源MAC地址、目的MAC地址、VLAN信息、協議類型、源IP地址,目的IP地址、端口號等信息,所有的key值都可以從skb數據包中提取到。

  1 int
  2 ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
  3              struct sk_buff *skb, struct sw_flow_key *key)
  4 {
  5     /* Extract metadata from packet. */
  6     if (tun_info) {
  7         memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
  8         if (tun_info->options) {
  9             BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
 10                            8)) - 1
 11                     > sizeof(key->tun_opts));
 12             memcpy(GENEVE_OPTS(key, tun_info->options_len),
 13                    tun_info->options, tun_info->options_len);
 14             key->tun_opts_len = tun_info->options_len;
 15         } else {
 16             key->tun_opts_len = 0;
 17         }
 18     } else  {
 19         key->tun_opts_len = 0;
 20         memset(&key->tun_key, 0, sizeof(key->tun_key));
 21     }
 22     /*根據skb相關信息,給key的相關變量賦值*/
 23     key->phy.priority = skb->priority;
 24     /*設置key->phy.in_port為vport的接口序號*/
 25     key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
 26     key->phy.skb_mark = skb->mark;
 27     key->ovs_flow_hash = 0;
 28     key->recirc_id = 0;
 29     /*提取skb中的MAC、協議、IP地址、端口信息等key值*/
 30     return key_extract(skb, key);
 31 }
 32 /**
 33  * key_extract - extracts a flow key from an Ethernet frame.
 34  * @skb: sk_buff that contains the frame, with skb->data pointing to the
 35  * Ethernet header
 36  * @key: output flow key
 37  *
 38  * The caller must ensure that skb->len >= ETH_HLEN.
 39  *
 40  * Returns 0 if successful, otherwise a negative errno value.
 41  *
 42  * Initializes @skb header pointers as follows:
 43  *
 44  *    - skb->mac_header: the Ethernet header.
 45  *
 46  *    - skb->network_header: just past the Ethernet header, or just past the
 47  *      VLAN header, to the first byte of the Ethernet payload.
 48  *
 49  *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
 50  *      on output, then just past the IP header, if one is present and
 51  *      of a correct length, otherwise the same as skb->network_header.
 52  *      For other key->eth.type values it is left untouched.
 53  */
 54 static int
 55 key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 56 {
 57     int error;
 58     struct ethhdr *eth;
 59     /* Flags are always used as part of stats */
 60     key->tp.flags = 0;
 61     /*重置L2層頭指針*/
 62     skb_reset_mac_header(skb);
 63     /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
 64      * header in the linear data area.
 65      */
 66     /*獲取二層頭部指針,並提取源MAC及目的MAC信息到key中*/
 67     eth = eth_hdr(skb);
 68     ether_addr_copy(key->eth.src, eth->h_source);
 69     ether_addr_copy(key->eth.dst, eth->h_dest);
 70     /*將skb->data指向L2的MAC地址結束的地址處,
 71     * 如果帶有VLAN信息,則skb->data指向vlan相關信息的開頭
 72     * 如果不帶vlan信息,則skb->data則指向eth.type字段處*/
 73     __skb_pull(skb, 2 * ETH_ALEN);
 74     /* We are going to push all headers that we pull, so no need to
 75      * update skb->csum here.
 76      */
 77     /*提取vlan信息到key中*/
 78     key->eth.tci = 0;
 79     if (vlan_tx_tag_present(skb))
 80         key->eth.tci = htons(skb->vlan_tci);
 81     else if (eth->h_proto == htons(ETH_P_8021Q))
 82         if (unlikely(parse_vlan(skb, key)))
 83             return -ENOMEM;
 84     /*提取ether type數據包類型如ETH_P_IP、ETH_P_ARP、ETH_P_IPV6等*/
 85     key->eth.type = parse_ethertype(skb);
 86     if (unlikely(key->eth.type == htons(0)))
 87         return -ENOMEM;
 88     /*重置L3頭部指針及MAC長度,保證skb->network_header指向正確的位置*/
 89     skb_reset_network_header(skb);
 90     skb_reset_mac_len(skb);
 91     __skb_push(skb, skb->data - skb_mac_header(skb));
 92     /* Network layer. */
 93     /*IP協議數據包*/
 94     if (key->eth.type == htons(ETH_P_IP)) {
 95         struct iphdr *nh;
 96         __be16 offset;
 97         /*檢查IP數據包的合法性,若合法則設置skb->transport_header*/
 98         error = check_iphdr(skb);
 99         if (unlikely(error)) {
100             /*不合法的IP數據包*/
101             memset(&key->ip, 0, sizeof(key->ip));
102             memset(&key->ipv4, 0, sizeof(key->ipv4));
103             if (error == -EINVAL) {
104                 /* 此處不知道為何將L4頭設置為L3層頭部,
105                 * 也不知道為何error=0,后面搞清楚了在回來修改*/
106                 skb->transport_header = skb->network_header;
107                 error = 0;
108             }
109             return error;
110         }
111         /*獲取L3層頭部指針,並提取源IP及目的IP信息到key中*/
112         nh = ip_hdr(skb);
113         key->ipv4.addr.src = nh->saddr;
114         key->ipv4.addr.dst = nh->daddr;
115         /*提取IP的四層協議信息、TOS及ttl信息到key中*/
116         key->ip.proto = nh->protocol;
117         key->ip.tos = nh->tos;
118         key->ip.ttl = nh->ttl;
119         /*
120         * 從L3層中提取IP分片信息
121         * 對IP分片中的幾個標志不是很清楚,暫時不做說明
122         */
123         offset = nh->frag_off & htons(IP_OFFSET);
124         if (offset) {
125             key->ip.frag = OVS_FRAG_TYPE_LATER;
126             return 0;
127         }
128         if (nh->frag_off & htons(IP_MF) ||
129             skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
130             key->ip.frag = OVS_FRAG_TYPE_FIRST;
131         else
132             key->ip.frag = OVS_FRAG_TYPE_NONE;
133         /* Transport layer. */
134         /*TCP協議數據包*/
135         if (key->ip.proto == IPPROTO_TCP) {
136             if (tcphdr_ok(skb)) {
137                 /*獲取tcp四層頭部,提取源端口及目的端口信息到key中*/
138                 struct tcphdr *tcp = tcp_hdr(skb);
139                 key->tp.src = tcp->source;
140                 key->tp.dst = tcp->dest;
141                 key->tp.flags = TCP_FLAGS_BE16(tcp);
142             } else {
143                 memset(&key->tp, 0, sizeof(key->tp));
144             }
145         /*UDP協議數據包*/
146         } else if (key->ip.proto == IPPROTO_UDP) {
147             if (udphdr_ok(skb)) {
148                 /*獲取UDP四層頭部,提取源端口及目的端口信息到key中*/
149                 struct udphdr *udp = udp_hdr(skb);
150                 key->tp.src = udp->source;
151                 key->tp.dst = udp->dest;
152             } else {
153                 memset(&key->tp, 0, sizeof(key->tp));
154             }
155         /*SCTP協議數據包*/
156         } else if (key->ip.proto == IPPROTO_SCTP) {
157             if (sctphdr_ok(skb)) {
158                 /*獲取SCTP四層頭部,提取源端口及目的端口到key中*/
159                 struct sctphdr *sctp = sctp_hdr(skb);
160                 key->tp.src = sctp->source;
161                 key->tp.dst = sctp->dest;
162             } else {
163                 memset(&key->tp, 0, sizeof(key->tp));
164             }
165         /*ICMP協議數據包*/
166         } else if (key->ip.proto == IPPROTO_ICMP) {
167             if (icmphdr_ok(skb)) {
168                 /*獲取ICMP頭部,並提取ICMP 類型及代碼字段到key中*/
169                 struct icmphdr *icmp = icmp_hdr(skb);
170                 /* The ICMP type and code fields use the 16-bit
171                  * transport port fields, so we need to store
172                  * them in 16-bit network byte order. */
173                 key->tp.src = htons(icmp->type);
174                 key->tp.dst = htons(icmp->code);
175             } else {
176                 memset(&key->tp, 0, sizeof(key->tp));
177             }
178         }
179     /*ARP協議或者RARP協議數據包*/
180     } else if (key->eth.type == htons(ETH_P_ARP) ||
181            key->eth.type == htons(ETH_P_RARP)) {
182         struct arp_eth_header *arp;
183         bool arp_available = arphdr_ok(skb);
184         /*獲取ARP頭部指針*/
185         arp = (struct arp_eth_header *)skb_network_header(skb);
186         if (arp_available &&
187             arp->ar_hrd == htons(ARPHRD_ETHER) &&
188             arp->ar_pro == htons(ETH_P_IP) &&
189             arp->ar_hln == ETH_ALEN &&
190             arp->ar_pln == 4) {
191             /*提取ARP option字段到key中*/
192             /* We only match on the lower 8 bits of the opcode. */
193             if (ntohs(arp->ar_op) <= 0xff)
194                 key->ip.proto = ntohs(arp->ar_op);
195             else
196                 key->ip.proto = 0;
197             /*提取源MAC、目的MAC、源IP及目的MAC信息到key中*/
198             memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
199             memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
200             ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
201             ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
202         } else {
203             memset(&key->ip, 0, sizeof(key->ip));
204             memset(&key->ipv4, 0, sizeof(key->ipv4));
205         }
206     /*去提取MPLS信息到key中*/
207     } else if (eth_p_mpls(key->eth.type)) {
208         size_t stack_len = MPLS_HLEN;
209         /* In the presence of an MPLS label stack the end of the L2
210          * header and the beginning of the L3 header differ.
211          *
212          * Advance network_header to the beginning of the L3
213          * header. mac_len corresponds to the end of the L2 header.
214          */
215         while (1) {
216             __be32 lse;
217             error = check_header(skb, skb->mac_len + stack_len);
218             if (unlikely(error))
219                 return 0;
220             memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
221             if (stack_len == MPLS_HLEN)
222                 memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
223             skb_set_network_header(skb, skb->mac_len + stack_len);
224             if (lse & htonl(MPLS_LS_S_MASK))
225                 break;
226             stack_len += MPLS_HLEN;
227         }
228     /*IPv6協議,提取IPv6相關信息到key中 */
229     } else if (key->eth.type == htons(ETH_P_IPV6)) {
230         int nh_len;             /* IPv6 Header + Extensions */
231         nh_len = parse_ipv6hdr(skb, key);
232         if (unlikely(nh_len < 0)) {
233             memset(&key->ip, 0, sizeof(key->ip));
234             memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
235             if (nh_len == -EINVAL) {
236                 skb->transport_header = skb->network_header;
237                 error = 0;
238             } else {
239                 error = nh_len;
240             }
241             return error;
242         }
243         if (key->ip.frag == OVS_FRAG_TYPE_LATER)
244             return 0;
245         if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
246             key->ip.frag = OVS_FRAG_TYPE_FIRST;
247         /* Transport layer. */
248         if (key->ip.proto == NEXTHDR_TCP) {
249             if (tcphdr_ok(skb)) {
250                 struct tcphdr *tcp = tcp_hdr(skb);
251                 key->tp.src = tcp->source;
252                 key->tp.dst = tcp->dest;
253                 key->tp.flags = TCP_FLAGS_BE16(tcp);
254             } else {
255                 memset(&key->tp, 0, sizeof(key->tp));
256             }
257         } else if (key->ip.proto == NEXTHDR_UDP) {
258             if (udphdr_ok(skb)) {
259                 struct udphdr *udp = udp_hdr(skb);
260                 key->tp.src = udp->source;
261                 key->tp.dst = udp->dest;
262             } else {
263                 memset(&key->tp, 0, sizeof(key->tp));
264             }
265         } else if (key->ip.proto == NEXTHDR_SCTP) {
266             if (sctphdr_ok(skb)) {
267                 struct sctphdr *sctp = sctp_hdr(skb);
268                 key->tp.src = sctp->source;
269                 key->tp.dst = sctp->dest;
270             } else {
271                 memset(&key->tp, 0, sizeof(key->tp));
272             }
273         } else if (key->ip.proto == NEXTHDR_ICMP) {
274             if (icmp6hdr_ok(skb)) {
275                 error = parse_icmpv6(skb, key, nh_len);
276                 if (error)
277                     return error;
278             } else {
279                 memset(&key->tp, 0, sizeof(key->tp));
280             }
281         }
282     }
283     return 0;
284 }

 

根據KEY值匹配流表(datapath/datapath.c)

通過ovs_flow_key_extract函數及key_extract函數從skb中提取所有需要的key值,下面就是使用key值來匹配OVS內核模塊openvswitch中緩存的流表信息,並在匹配到流表后執行流表中相應的動作處理數據包。若在內核中未匹配到流表,則通過Netlink消息將key值發送到用戶態ovs-vswitchd進程,由用戶態進程來決定如何處理數據包

 1 /* Must be called with rcu_read_lock. */
 2 void
 3 ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 4 {
 5     const struct vport *p = OVS_CB(skb)->input_vport;
 6     struct datapath *dp = p->dp;
 7     struct sw_flow *flow;
 8     struct sw_flow_actions *sf_acts;
 9     struct dp_stats_percpu *stats;
10     u64 *stats_counter;
11     u32 n_mask_hit;
12     /*獲取每CPU變量dp->stats_percpu*/
13     stats = this_cpu_ptr(dp->stats_percpu);
14     /* Look up flow. */
15     /*根據key值遍歷所有的流表*/
16     flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
17     if (unlikely(!flow)) {
18         /*
19         * 未匹配到任何流表,則將key值封裝到Netlink消息中通過
20         * netlink發送到用戶態ovs-vswitchd進程
21         * 由用戶態進程來決定如何處理數據包
22         */
23         struct dp_upcall_info upcall;
24         int error;
25         upcall.cmd = OVS_PACKET_CMD_MISS;
26         upcall.userdata = NULL;
27         upcall.portid = ovs_vport_find_upcall_portid(p, skb);
28         upcall.egress_tun_info = NULL;
29         /*封裝Netlink消息並發送給用戶態ovs-vswitchd進程*/
30         error = ovs_dp_upcall(dp, skb, key, &upcall);
31         if (unlikely(error))
32             kfree_skb(skb);
33         else
34             consume_skb(skb);
35         stats_counter = &stats->n_missed;
36         goto out;
37     }
38     /*查詢到流表后,更新相關流表的信息,包括流表匹配的包數及字節數*/
39     ovs_flow_stats_update(flow, key->tp.flags, skb);
40     /*獲取匹配的流表的執行動作*/
41     sf_acts = rcu_dereference(flow->sf_acts);
42     /*執行匹配流表的動作*/
43     ovs_execute_actions(dp, skb, sf_acts, key);
44     stats_counter = &stats->n_hit;
45 out:
46     /* Update datapath statistics. */
47     u64_stats_update_begin(&stats->syncp);
48     (*stats_counter)++;
49     stats->n_mask_hit += n_mask_hit;
50     u64_stats_update_end(&stats->syncp);
51 }

 

執行流表ACTION(datapath/actions.c)

匹配到對應的流表后,從流表中獲取流表的動作,循環遍歷所有的flow action,執行相應的action動作。

 

  1 /* Execute a list of actions against 'skb'. */
  2 int
  3 ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
  4             const struct sw_flow_actions *acts,
  5             struct sw_flow_key *key)
  6 {
  7     int level = this_cpu_read(exec_actions_level);
  8     int err;
  9     this_cpu_inc(exec_actions_level);
 10     OVS_CB(skb)->egress_tun_info = NULL;
 11     /*執行流表動作*/
 12     err = do_execute_actions(dp, skb, key,
 13                  acts->actions, acts->actions_len);
 14     /*不知道這個process_deferred_actions具體是干什么的*/
 15     if (!level)
 16         process_deferred_actions(dp);
 17     this_cpu_dec(exec_actions_level);
 18     return err;
 19 }
 20 /* Execute a list of actions against 'skb'. */
 21 static int
 22 do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 23                   struct sw_flow_key *key,
 24                   const struct nlattr *attr, int len)
 25 {
 26     /* Every output action needs a separate clone of 'skb', but the common
 27      * case is just a single output action, so that doing a clone and
 28      * then freeing the original skbuff is wasteful.  So the following code
 29      * is slightly obscure just to avoid that.
 30      */
 31     int prev_port = -1;
 32     const struct nlattr *a;
 33     int rem;
 34     for (a = attr, rem = len; rem > 0;
 35          a = nla_next(a, &rem)) {
 36         int err = 0;
 37         if (unlikely(prev_port != -1)) {
 38             /*設置了output接口,克隆一份skb將數據包從
 39             * prv_port接口發送出去*/
 40             struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
 41             if (out_skb)
 42                 do_output(dp, out_skb, prev_port);
 43             prev_port = -1;
 44         }
 45         switch (nla_type(a)) {
 46         /*數據包發送的端口號*/
 47         case OVS_ACTION_ATTR_OUTPUT:
 48             prev_port = nla_get_u32(a);
 49             break;
 50         /*將數據包發送到用戶態進程*/
 51         case OVS_ACTION_ATTR_USERSPACE:
 52             output_userspace(dp, skb, key, a);
 53             break;
 54         /*為key->ovs_flow_hash賦值*/
 55         case OVS_ACTION_ATTR_HASH:
 56             execute_hash(skb, key, a);
 57             break;
 58         /*MPLS處理,不了解MPLS,忽略*/
 59         case OVS_ACTION_ATTR_PUSH_MPLS:
 60             err = push_mpls(skb, key, nla_data(a));
 61             break;
 62         /*MPLS處理,忽略*/
 63         case OVS_ACTION_ATTR_POP_MPLS:
 64             err = pop_mpls(skb, key, nla_get_be16(a));
 65             break;
 66         /*設置VLAN tag*/
 67         case OVS_ACTION_ATTR_PUSH_VLAN:
 68             err = push_vlan(skb, key, nla_data(a));
 69             break;
 70         /*去Vlan tag*/
 71         case OVS_ACTION_ATTR_POP_VLAN:
 72             err = pop_vlan(skb, key);
 73             break;
 74         /*將skb及key添加到defered action中*/
 75         case OVS_ACTION_ATTR_RECIRC:
 76             err = execute_recirc(dp, skb, key, a, rem);
 77             if (nla_is_last(a, rem)) {
 78                 /* If this is the last action, the skb has
 79                  * been consumed or freed.
 80                  * Return immediately.
 81                  */
 82                 return err;
 83             }
 84             break;
 85         /*根據修改的動作,對數據包進行修改*/
 86         case OVS_ACTION_ATTR_SET:
 87             err = execute_set_action(skb, key, nla_data(a));
 88             break;
 89         case OVS_ACTION_ATTR_SAMPLE:
 90             err = sample(dp, skb, key, a);
 91             break;
 92         }
 93         if (unlikely(err)) {
 94             kfree_skb(skb);
 95             return err;
 96         }
 97     }
 98     if (prev_port != -1)
 99         do_output(dp, skb, prev_port);
100     else
101         consume_skb(skb);
102     return 0;
103 }

 

OUTPUT ACTION(datapath/actions.c)

流表的OUTPUT動作指定了數據包發送的出接口信息,調用do_output->ovs_vport_send->vport->ops->send發送函數將數據包從output action對應的接口發送出去。

 1 /*do_outpu發送數據包*/
 2 static void
 3 do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 4 {
 5     struct vport *vport = ovs_vport_rcu(dp, out_port);
 6     if (likely(vport))
 7         ovs_vport_send(vport, skb);
 8     else
 9         kfree_skb(skb);
10 }
11 /**
12  *    ovs_vport_send - send a packet on a device
13  *
14  * @vport: vport on which to send the packet
15  * @skb: skb to send
16  *
17  * Sends the given packet and returns the length of data sent.  Either ovs
18  * lock or rcu_read_lock must be held.
19  */
20 int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
21 {
22     /* 調用vport->ops->send回調函數發送數據包 */ 
23     int sent = vport->ops->send(vport, skb);
24     if (likely(sent > 0)) {
25         struct pcpu_sw_netstats *stats;
26         /*發送成功后更新每CPU變量vport->percpu_stats中的發送包數及發送字節數*/
27         stats = this_cpu_ptr(vport->percpu_stats);
28         u64_stats_update_begin(&stats->syncp);
29         stats->tx_packets++;
30         stats->tx_bytes += sent;
31         u64_stats_update_end(&stats->syncp);
32     } else if (sent < 0) {
33         ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
34     } else {
35         ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
36     }
37     return sent;
38 }

 

當OVS接口類型為system時,vport->ops->send函數為netdev_send:

 

 1 /*此函數即為OVS流表output action 發送數據包時的函數*/
 2 static int
 3 netdev_send(struct vport *vport, struct sk_buff *skb)
 4 {
 5     struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 6     int mtu = netdev_vport->dev->mtu;
 7     int len;
 8     /*如果未開啟gso且數據包長度大於MTU,則釋放數據包*/
 9     if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
10         net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
11                      netdev_vport->dev->name,
12                      packet_length(skb), mtu);
13         goto drop;
14     }
15     /*設置skb->dev為output action網口*/
16     skb->dev = netdev_vport->dev;
17     len = skb->len;
18     /*最后調用dev_queue_xmit發送數據包*/
19     dev_queue_xmit(skb);
20     return len;
21 drop:
22     kfree_skb(skb);
23     return 0;
24 }

 

SET ACTION(datapath/actions.c)

流表SET動作會修改數據包中指定的信息,如skb->priority skb->mark等信息。

 1 static int
 2 execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
 3                   const struct nlattr *nested_attr)
 4 {
 5     int err = 0;
 6     switch (nla_type(nested_attr)) {
 7     case OVS_KEY_ATTR_PRIORITY:
 8         skb->priority = nla_get_u32(nested_attr);
 9         key->phy.priority = skb->priority;
10         break;
11     case OVS_KEY_ATTR_SKB_MARK:
12         skb->mark = nla_get_u32(nested_attr);
13         key->phy.skb_mark = skb->mark;
14         break;
15     case OVS_KEY_ATTR_TUNNEL_INFO:
16         OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
17         break;
18     case OVS_KEY_ATTR_ETHERNET:
19         err = set_eth_addr(skb, key, nla_data(nested_attr));
20         break;
21     case OVS_KEY_ATTR_IPV4:
22         err = set_ipv4(skb, key, nla_data(nested_attr));
23         break;
24     case OVS_KEY_ATTR_IPV6:
25         err = set_ipv6(skb, key, nla_data(nested_attr));
26         break;
27     case OVS_KEY_ATTR_TCP:
28         err = set_tcp(skb, key, nla_data(nested_attr));
29         break;
30     case OVS_KEY_ATTR_UDP:
31         err = set_udp(skb, key, nla_data(nested_attr));
32         break;
33     case OVS_KEY_ATTR_SCTP:
34         err = set_sctp(skb, key, nla_data(nested_attr));
35         break;
36     case OVS_KEY_ATTR_MPLS:
37         err = set_mpls(skb, key, nla_data(nested_attr));
38         break;
39     }
40     return err;
41 }

 

PUSH_VLAN ACTION(datapath/actions.c)

流表PUSH_VLAN動作會在數據包中添加對應的VLAN tag信息。

 1 static int
 2 push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 3              const struct ovs_action_push_vlan *vlan)
 4 {
 5     if (vlan_tx_tag_present(skb))
 6         invalidate_flow_key(key);
 7     else
 8         key->eth.tci = vlan->vlan_tci;
 9     return skb_vlan_push(skb, vlan->vlan_tpid,
10                  ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
11 }
12 int
13 skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
14 {
15     if (vlan_tx_tag_present(skb)) {
16         unsigned int offset = skb->data - skb_mac_header(skb);
17         int err;
18         /* __vlan_insert_tag expect skb->data pointing to mac header.
19          * So change skb->data before calling it and change back to
20          * original position later
21          */
22         __skb_push(skb, offset);
23         err = __vlan_insert_tag(skb, skb->vlan_proto,
24                     vlan_tx_tag_get(skb));
25         if (err)
26             return err;
27         skb->protocol = skb->vlan_proto;
28         skb->mac_len += VLAN_HLEN;
29         __skb_pull(skb, offset);
30         if (skb->ip_summed == CHECKSUM_COMPLETE)
31             skb->csum = csum_add(skb->csum, csum_partial(skb->data
32                     + (2 * ETH_ALEN), VLAN_HLEN, 0));
33     }
34     __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
35     return 0;
36 }
37 static inline void
38 __vlan_hwaccel_put_tag(struct sk_buff *skb,
39                       __be16 vlan_proto, u16 vlan_tci)
40 {
41     /*設置數據包Vlan tag信息*/
42     skb->vlan_proto = vlan_proto;
43     skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
44 }

 

POP_VLAN ACTION(datapath/actions.c)

流表POP_VLAN動作移除數據包中的Vlan tag信息並更新數據包中的校驗和

 1 static int
 2 pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 3 {
 4     int err;
 5     err = skb_vlan_pop(skb);
 6     if (vlan_tx_tag_present(skb))
 7         invalidate_flow_key(key);
 8     else
 9         key->eth.tci = 0;
10     return err;
11 }
12 int
13 skb_vlan_pop(struct sk_buff *skb)
14 {
15     u16 vlan_tci;
16     __be16 vlan_proto;
17     int err;
18     if (likely(vlan_tx_tag_present(skb))) {
19         skb->vlan_tci = 0;
20     } else {
21         if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
22                   skb->protocol != htons(ETH_P_8021AD)) ||
23                  skb->len < VLAN_ETH_HLEN))
24             return 0;
25         err = __skb_vlan_pop(skb, &vlan_tci);
26         if (err)
27             return err;
28     }
29     /* move next vlan tag to hw accel tag */
30     if (likely((skb->protocol != htons(ETH_P_8021Q) &&
31             skb->protocol != htons(ETH_P_8021AD)) ||
32            skb->len < VLAN_ETH_HLEN))
33         return 0;
34     vlan_proto = skb->protocol;
35     err = __skb_vlan_pop(skb, &vlan_tci);
36     if (unlikely(err))
37         return err;
38     __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
39     return 0;
40 }
41 /* remove VLAN header from packet and update csum accordingly. */
42 static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
43 {
44     struct vlan_hdr *vhdr;
45     unsigned int offset = skb->data - skb_mac_header(skb);
46     int err;
47     __skb_push(skb, offset);
48     err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
49     if (unlikely(err))
50         goto pull;
51     
52     skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
53     vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
54     *vlan_tci = ntohs(vhdr->h_vlan_TCI);
55     memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
56     __skb_pull(skb, VLAN_HLEN);
57     vlan_set_encap_proto(skb, vhdr);
58     skb->mac_header += VLAN_HLEN;
59     if (skb_network_offset(skb) < ETH_HLEN)
60         skb_set_network_header(skb, ETH_HLEN);
61     skb_reset_mac_len(skb);
62 pull:
63     __skb_pull(skb, offset);
64     return err;
65 }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM