net_device結構是二層中一個非常重要的結構,其結構中成員很多,包含了硬件信息,接口信息,其他輔助信息,以及設備操作函數等等;下面對其中重要的字段含義進行了標注,目前仍在閱讀代碼過程中,其中某些字段含義會在后面進行補充或者更新;
1 struct net_device { 2 /* 設備名稱,如eth0 */ 3 char name[IFNAMSIZ]; 4 /* 名稱hash */ 5 struct hlist_node name_hlist; 6 char *ifalias; 7 /* 8 * I/O specific fields 9 * FIXME: Merge these and struct ifmap into one 10 */ 11 /* 12 描述設備所用的共享內存,用於設備與內核溝通 13 其初始化和訪問只會在設備驅動程序內進行 14 */ 15 unsigned long mem_end; 16 unsigned long mem_start; 17 18 /* 設備自有內存映射到I/O內存的起始地址 */ 19 unsigned long base_addr; 20 21 /* 22 設備與內核對話的中斷編號,此值可由多個設備共享 23 驅動程序使用request_irq函數分配此變量,使用free_irq予以釋放 24 */ 25 int irq; 26 27 /* 偵測網絡狀態的改變次數 */ 28 atomic_t carrier_changes; 29 30 /* 31 * Some hardware also needs these fields (state,dev_list, 32 * napi_list,unreg_list,close_list) but they are not 33 * part of the usual set specified in Space.c. 34 */ 35 36 /* 37 網絡隊列子系統使用的一組標識 38 由__LINK_STATE_xxx標識 39 */ 40 unsigned long state; 41 42 struct list_head dev_list; 43 struct list_head napi_list; 44 struct list_head unreg_list; 45 struct list_head close_list; 46 47 /* 當前設備所有協議的鏈表 */ 48 struct list_head ptype_all; 49 /* 當前設備特定協議的鏈表 */ 50 struct list_head ptype_specific; 51 52 struct { 53 struct list_head upper; 54 struct list_head lower; 55 } adj_list; 56 57 /* 58 用於存在其他一些設備功能 59 可報告適配卡的功能,以便與CPU通信 60 使用NETIF_F_XXX標識功能特性 61 */ 62 netdev_features_t features; 63 netdev_features_t hw_features; 64 netdev_features_t wanted_features; 65 netdev_features_t vlan_features; 66 netdev_features_t hw_enc_features; 67 netdev_features_t mpls_features; 68 netdev_features_t gso_partial_features; 69 70 /* 網絡設備索引號 */ 71 int ifindex; 72 73 /* 設備組,默認都屬於0組 */ 74 int group; 75 76 struct net_device_stats stats; 77 78 atomic_long_t rx_dropped; 79 atomic_long_t tx_dropped; 80 atomic_long_t rx_nohandler; 81 82 #ifdef CONFIG_WIRELESS_EXT 83 const struct iw_handler_def *wireless_handlers; 84 struct iw_public_data *wireless_data; 85 #endif 86 /* 設備操作接口 */ 87 const struct net_device_ops *netdev_ops; 88 /* ethtool操作接口 */ 89 const struct ethtool_ops *ethtool_ops; 90 #ifdef CONFIG_NET_SWITCHDEV 91 const struct switchdev_ops *switchdev_ops; 92 #endif 93 #ifdef CONFIG_NET_L3_MASTER_DEV 94 const struct l3mdev_ops *l3mdev_ops; 95 #endif 96 #if IS_ENABLED(CONFIG_IPV6) 97 const struct ndisc_ops *ndisc_ops; 98 #endif 99 100 #ifdef CONFIG_XFRM 101 const struct xfrmdev_ops *xfrmdev_ops; 102 #endif 103 104 /* 頭部一些操作,如鏈路層緩存,校驗等 */ 105 const struct header_ops *header_ops; 106 107 /* 標識接口特性,IFF_XXX,如IFF_UP */ 108 unsigned int flags; 109 110 /* 111 用於存儲用戶空間不可見的標識 112 由VLAN和Bridge虛擬設備使用 113 */ 114 unsigned int priv_flags; 115 116 /* 幾乎不使用,為了兼容保留 */ 117 unsigned short gflags; 118 119 /* 結構對齊填充 */ 120 unsigned short padded; 121 122 /* 與interface group mib中的IfOperStatus相關 */ 123 unsigned char operstate; 124 unsigned char link_mode; 125 126 /* 127 接口使用的端口類型 128 */ 129 unsigned char if_port; 130 131 /* 132 設備使用的DMA通道 133 並非所有設備都可以用DMA,有些總線不支持DMA 134 */ 135 unsigned char dma; 136 137 /* 138 最大傳輸單元,標識設備能處理幀的最大尺寸 139 Ethernet-1500 140 */ 141 unsigned int mtu; 142 /* 最小mtu,Ethernet-68 */ 143 unsigned int min_mtu; 144 /* 最大mut,Ethernet-65535 */ 145 unsigned int max_mtu; 146 147 /* 設備所屬類型 148 ARP模塊中,用type判斷接口的硬件地址類型 149 以太網接口為ARPHRD_ETHER 150 */ 151 unsigned short type; 152 /* 153 設備頭部長度 154 Ethernet報頭是ETH_HLEN=14字節 155 */ 156 unsigned short hard_header_len; 157 unsigned char min_header_len; 158 159 /* 必須的頭部空間 */ 160 unsigned short needed_headroom; 161 unsigned short needed_tailroom; 162 163 /* Interface address info. */ 164 /* 硬件地址,通常在初始化過程中從硬件讀取 */ 165 unsigned char perm_addr[MAX_ADDR_LEN]; 166 unsigned char addr_assign_type; 167 /* 硬件地址長度 */ 168 unsigned char addr_len; 169 unsigned short neigh_priv_len; 170 unsigned short dev_id; 171 unsigned short dev_port; 172 spinlock_t addr_list_lock; 173 /* 設備名賦值類型,如NET_NAME_UNKNOWN */ 174 unsigned char name_assign_type; 175 bool uc_promisc; 176 struct netdev_hw_addr_list uc; 177 struct netdev_hw_addr_list mc; 178 struct netdev_hw_addr_list dev_addrs; 179 180 #ifdef CONFIG_SYSFS 181 struct kset *queues_kset; 182 #endif 183 /* 混雜模式開啟數量 */ 184 unsigned int promiscuity; 185 186 /* 非零值時,設備監聽所有多播地址 */ 187 unsigned int allmulti; 188 189 190 /* Protocol-specific pointers */ 191 /* 特定協議的指針 */ 192 #if IS_ENABLED(CONFIG_VLAN_8021Q) 193 struct vlan_info __rcu *vlan_info; 194 #endif 195 #if IS_ENABLED(CONFIG_NET_DSA) 196 struct dsa_switch_tree *dsa_ptr; 197 #endif 198 #if IS_ENABLED(CONFIG_TIPC) 199 struct tipc_bearer __rcu *tipc_ptr; 200 #endif 201 void *atalk_ptr; 202 /* ip指向in_device結構 */ 203 struct in_device __rcu *ip_ptr; 204 struct dn_dev __rcu *dn_ptr; 205 struct inet6_dev __rcu *ip6_ptr; 206 void *ax25_ptr; 207 struct wireless_dev *ieee80211_ptr; 208 struct wpan_dev *ieee802154_ptr; 209 #if IS_ENABLED(CONFIG_MPLS_ROUTING) 210 struct mpls_dev __rcu *mpls_ptr; 211 #endif 212 213 /* 214 * Cache lines mostly used on receive path (including eth_type_trans()) 215 */ 216 /* Interface address info used in eth_type_trans() */ 217 unsigned char *dev_addr; 218 219 #ifdef CONFIG_SYSFS 220 /* 接收隊列 */ 221 struct netdev_rx_queue *_rx; 222 223 /* 接收隊列數 */ 224 unsigned int num_rx_queues; 225 unsigned int real_num_rx_queues; 226 #endif 227 228 struct bpf_prog __rcu *xdp_prog; 229 unsigned long gro_flush_timeout; 230 231 /* 如網橋等的收包回調 */ 232 rx_handler_func_t __rcu *rx_handler; 233 /* 回調參數 */ 234 void __rcu *rx_handler_data; 235 236 #ifdef CONFIG_NET_CLS_ACT 237 struct tcf_proto __rcu *ingress_cl_list; 238 #endif 239 struct netdev_queue __rcu *ingress_queue; 240 #ifdef CONFIG_NETFILTER_INGRESS 241 /* netfilter入口 */ 242 struct nf_hook_entry __rcu *nf_hooks_ingress; 243 #endif 244 245 /* 鏈路層廣播地址 */ 246 unsigned char broadcast[MAX_ADDR_LEN]; 247 #ifdef CONFIG_RFS_ACCEL 248 struct cpu_rmap *rx_cpu_rmap; 249 #endif 250 /* 接口索引hash */ 251 struct hlist_node index_hlist; 252 253 /* 254 * Cache lines mostly used on transmit path 255 */ 256 /* 發送隊列 */ 257 struct netdev_queue *_tx ____cacheline_aligned_in_smp; 258 /* 發送隊列數 */ 259 unsigned int num_tx_queues; 260 unsigned int real_num_tx_queues; 261 /* 排隊規則 */ 262 struct Qdisc *qdisc; 263 #ifdef CONFIG_NET_SCHED 264 DECLARE_HASHTABLE (qdisc_hash, 4); 265 #endif 266 /* 267 可在設備發送隊列中排隊的最大數據包數 268 */ 269 unsigned long tx_queue_len; 270 spinlock_t tx_global_lock; 271 272 /* 網絡層確定傳輸超時, 273 調用驅動程序tx_timeout接口的最短時間 274 */ 275 int watchdog_timeo; 276 277 #ifdef CONFIG_XPS 278 struct xps_dev_maps __rcu *xps_maps; 279 #endif 280 #ifdef CONFIG_NET_CLS_ACT 281 struct tcf_proto __rcu *egress_cl_list; 282 #endif 283 284 /* These may be needed for future network-power-down code. */ 285 /* watchdog定時器 */ 286 struct timer_list watchdog_timer; 287 288 /* 引用計數 */ 289 int __percpu *pcpu_refcnt; 290 291 /* 網絡設備的注冊和除名以兩步進行, 292 該字段用於處理第二步 293 */ 294 struct list_head todo_list; 295 296 struct list_head link_watch_list; 297 298 /* 設備的注冊狀態 */ 299 enum { NETREG_UNINITIALIZED=0, 300 NETREG_REGISTERED, /* completed register_netdevice */ 301 NETREG_UNREGISTERING, /* called unregister_netdevice */ 302 NETREG_UNREGISTERED, /* completed unregister todo */ 303 NETREG_RELEASED, /* called free_netdev */ 304 NETREG_DUMMY, /* dummy device for NAPI poll */ 305 } reg_state:8; 306 307 /* 設備要被釋放標記 */ 308 bool dismantle; 309 310 enum { 311 RTNL_LINK_INITIALIZED, 312 RTNL_LINK_INITIALIZING, 313 } rtnl_link_state:16; 314 315 bool needs_free_netdev; 316 void (*priv_destructor)(struct net_device *dev); 317 318 #ifdef CONFIG_NETPOLL 319 struct netpoll_info __rcu *npinfo; 320 #endif 321 322 possible_net_t nd_net; 323 324 /* mid-layer private */ 325 union { 326 void *ml_priv; 327 struct pcpu_lstats __percpu *lstats; 328 struct pcpu_sw_netstats __percpu *tstats; 329 struct pcpu_dstats __percpu *dstats; 330 struct pcpu_vstats __percpu *vstats; 331 }; 332 333 #if IS_ENABLED(CONFIG_GARP) 334 struct garp_port __rcu *garp_port; 335 #endif 336 #if IS_ENABLED(CONFIG_MRP) 337 struct mrp_port __rcu *mrp_port; 338 #endif 339 340 struct device dev; 341 const struct attribute_group *sysfs_groups[4]; 342 const struct attribute_group *sysfs_rx_queue_group; 343 344 const struct rtnl_link_ops *rtnl_link_ops; 345 346 /* for setting kernel sock attribute on TCP connection setup */ 347 #define GSO_MAX_SIZE 65536 348 unsigned int gso_max_size; 349 #define GSO_MAX_SEGS 65535 350 u16 gso_max_segs; 351 352 #ifdef CONFIG_DCB 353 const struct dcbnl_rtnl_ops *dcbnl_ops; 354 #endif 355 u8 num_tc; 356 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; 357 u8 prio_tc_map[TC_BITMASK + 1]; 358 359 #if IS_ENABLED(CONFIG_FCOE) 360 unsigned int fcoe_ddp_xid; 361 #endif 362 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) 363 struct netprio_map __rcu *priomap; 364 #endif 365 struct phy_device *phydev; 366 struct lock_class_key *qdisc_tx_busylock; 367 struct lock_class_key *qdisc_running_key; 368 bool proto_down; 369 };
上述net_device結構中的netdev_ops成員對應設備操作函數結構,用來初始化,銷毀,開啟,關閉,修改某些變量值等相關操作,結構如下,其中函數操作並未注釋,在閱讀代碼的過程中,遇到具體實現具體分析;
1 struct net_device_ops { 2 int (*ndo_init)(struct net_device *dev); 3 void (*ndo_uninit)(struct net_device *dev); 4 int (*ndo_open)(struct net_device *dev); 5 int (*ndo_stop)(struct net_device *dev); 6 netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, 7 struct net_device *dev); 8 netdev_features_t (*ndo_features_check)(struct sk_buff *skb, 9 struct net_device *dev, 10 netdev_features_t features); 11 u16 (*ndo_select_queue)(struct net_device *dev, 12 struct sk_buff *skb, 13 void *accel_priv, 14 select_queue_fallback_t fallback); 15 void (*ndo_change_rx_flags)(struct net_device *dev, 16 int flags); 17 void (*ndo_set_rx_mode)(struct net_device *dev); 18 int (*ndo_set_mac_address)(struct net_device *dev, 19 void *addr); 20 int (*ndo_validate_addr)(struct net_device *dev); 21 int (*ndo_do_ioctl)(struct net_device *dev, 22 struct ifreq *ifr, int cmd); 23 int (*ndo_set_config)(struct net_device *dev, 24 struct ifmap *map); 25 int (*ndo_change_mtu)(struct net_device *dev, 26 int new_mtu); 27 int (*ndo_neigh_setup)(struct net_device *dev, 28 struct neigh_parms *); 29 void (*ndo_tx_timeout) (struct net_device *dev); 30 31 void (*ndo_get_stats64)(struct net_device *dev, 32 struct rtnl_link_stats64 *storage); 33 bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); 34 int (*ndo_get_offload_stats)(int attr_id, 35 const struct net_device *dev, 36 void *attr_data); 37 struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); 38 39 int (*ndo_vlan_rx_add_vid)(struct net_device *dev, 40 __be16 proto, u16 vid); 41 int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, 42 __be16 proto, u16 vid); 43 #ifdef CONFIG_NET_POLL_CONTROLLER 44 void (*ndo_poll_controller)(struct net_device *dev); 45 int (*ndo_netpoll_setup)(struct net_device *dev, 46 struct netpoll_info *info); 47 void (*ndo_netpoll_cleanup)(struct net_device *dev); 48 #endif 49 int (*ndo_set_vf_mac)(struct net_device *dev, 50 int queue, u8 *mac); 51 int (*ndo_set_vf_vlan)(struct net_device *dev, 52 int queue, u16 vlan, 53 u8 qos, __be16 proto); 54 int (*ndo_set_vf_rate)(struct net_device *dev, 55 int vf, int min_tx_rate, 56 int max_tx_rate); 57 int (*ndo_set_vf_spoofchk)(struct net_device *dev, 58 int vf, bool setting); 59 int (*ndo_set_vf_trust)(struct net_device *dev, 60 int vf, bool setting); 61 int (*ndo_get_vf_config)(struct net_device *dev, 62 int vf, 63 struct ifla_vf_info *ivf); 64 int (*ndo_set_vf_link_state)(struct net_device *dev, 65 int vf, int link_state); 66 int (*ndo_get_vf_stats)(struct net_device *dev, 67 int vf, 68 struct ifla_vf_stats 69 *vf_stats); 70 int (*ndo_set_vf_port)(struct net_device *dev, 71 int vf, 72 struct nlattr *port[]); 73 int (*ndo_get_vf_port)(struct net_device *dev, 74 int vf, struct sk_buff *skb); 75 int (*ndo_set_vf_guid)(struct net_device *dev, 76 int vf, u64 guid, 77 int guid_type); 78 int (*ndo_set_vf_rss_query_en)( 79 struct net_device *dev, 80 int vf, bool setting); 81 int (*ndo_setup_tc)(struct net_device *dev, 82 u32 handle, 83 __be16 protocol, 84 struct tc_to_netdev *tc); 85 #if IS_ENABLED(CONFIG_FCOE) 86 int (*ndo_fcoe_enable)(struct net_device *dev); 87 int (*ndo_fcoe_disable)(struct net_device *dev); 88 int (*ndo_fcoe_ddp_setup)(struct net_device *dev, 89 u16 xid, 90 struct scatterlist *sgl, 91 unsigned int sgc); 92 int (*ndo_fcoe_ddp_done)(struct net_device *dev, 93 u16 xid); 94 int (*ndo_fcoe_ddp_target)(struct net_device *dev, 95 u16 xid, 96 struct scatterlist *sgl, 97 unsigned int sgc); 98 int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, 99 struct netdev_fcoe_hbainfo *hbainfo); 100 #endif 101 102 #if IS_ENABLED(CONFIG_LIBFCOE) 103 #define NETDEV_FCOE_WWNN 0 104 #define NETDEV_FCOE_WWPN 1 105 int (*ndo_fcoe_get_wwn)(struct net_device *dev, 106 u64 *wwn, int type); 107 #endif 108 109 #ifdef CONFIG_RFS_ACCEL 110 int (*ndo_rx_flow_steer)(struct net_device *dev, 111 const struct sk_buff *skb, 112 u16 rxq_index, 113 u32 flow_id); 114 #endif 115 int (*ndo_add_slave)(struct net_device *dev, 116 struct net_device *slave_dev); 117 int (*ndo_del_slave)(struct net_device *dev, 118 struct net_device *slave_dev); 119 netdev_features_t (*ndo_fix_features)(struct net_device *dev, 120 netdev_features_t features); 121 int (*ndo_set_features)(struct net_device *dev, 122 netdev_features_t features); 123 int (*ndo_neigh_construct)(struct net_device *dev, 124 struct neighbour *n); 125 void (*ndo_neigh_destroy)(struct net_device *dev, 126 struct neighbour *n); 127 128 int (*ndo_fdb_add)(struct ndmsg *ndm, 129 struct nlattr *tb[], 130 struct net_device *dev, 131 const unsigned char *addr, 132 u16 vid, 133 u16 flags); 134 int (*ndo_fdb_del)(struct ndmsg *ndm, 135 struct nlattr *tb[], 136 struct net_device *dev, 137 const unsigned char *addr, 138 u16 vid); 139 int (*ndo_fdb_dump)(struct sk_buff *skb, 140 struct netlink_callback *cb, 141 struct net_device *dev, 142 struct net_device *filter_dev, 143 int *idx); 144 145 int (*ndo_bridge_setlink)(struct net_device *dev, 146 struct nlmsghdr *nlh, 147 u16 flags); 148 int (*ndo_bridge_getlink)(struct sk_buff *skb, 149 u32 pid, u32 seq, 150 struct net_device *dev, 151 u32 filter_mask, 152 int nlflags); 153 int (*ndo_bridge_dellink)(struct net_device *dev, 154 struct nlmsghdr *nlh, 155 u16 flags); 156 int (*ndo_change_carrier)(struct net_device *dev, 157 bool new_carrier); 158 int (*ndo_get_phys_port_id)(struct net_device *dev, 159 struct netdev_phys_item_id *ppid); 160 int (*ndo_get_phys_port_name)(struct net_device *dev, 161 char *name, size_t len); 162 void (*ndo_udp_tunnel_add)(struct net_device *dev, 163 struct udp_tunnel_info *ti); 164 void (*ndo_udp_tunnel_del)(struct net_device *dev, 165 struct udp_tunnel_info *ti); 166 void* (*ndo_dfwd_add_station)(struct net_device *pdev, 167 struct net_device *dev); 168 void (*ndo_dfwd_del_station)(struct net_device *pdev, 169 void *priv); 170 171 netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, 172 struct net_device *dev, 173 void *priv); 174 int (*ndo_get_lock_subclass)(struct net_device *dev); 175 int (*ndo_set_tx_maxrate)(struct net_device *dev, 176 int queue_index, 177 u32 maxrate); 178 int (*ndo_get_iflink)(const struct net_device *dev); 179 int (*ndo_change_proto_down)(struct net_device *dev, 180 bool proto_down); 181 int (*ndo_fill_metadata_dst)(struct net_device *dev, 182 struct sk_buff *skb); 183 void (*ndo_set_rx_headroom)(struct net_device *dev, 184 int needed_headroom); 185 int (*ndo_xdp)(struct net_device *dev, 186 struct netdev_xdp *xdp); 187 };
上述net_device結構中的header_ops成員用來進行鏈路頭部操作,鄰居子系統在發送數據包時會用到該結構的成員函數,比如以太網的實現,會通過cache函數將以太頭緩存到鄰居子系統的hh中,數據包發送前,直接拷貝緩存的以太頭即可,無需重新組裝;
1 struct header_ops { 2 int (*create) (struct sk_buff *skb, struct net_device *dev, 3 unsigned short type, const void *daddr, 4 const void *saddr, unsigned int len); 5 int (*parse)(const struct sk_buff *skb, unsigned char *haddr); 6 int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); 7 void (*cache_update)(struct hh_cache *hh, 8 const struct net_device *dev, 9 const unsigned char *haddr); 10 bool (*validate)(const char *ll_header, unsigned int len); 11 };
