ixgbe驅動初始化


http://abcdxyzk.github.io/blog/2020/05/21/ixgbe-init/

 

 

首先模塊加載insmod ixgbe.ko

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 
module_init(ixgbe_init_module);  module_init(ixgbe_init_module); {  int ret;  pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);  pr_info("%s\n", ixgbe_copyright);   ixgbe_dbg_init();      ret = pci_register_driver(&ixgbe_driver);  if (ret) {  ixgbe_dbg_exit();  return ret;  }  #ifdef CONFIG_IXGBE_DCA  dca_register_notify(&dca_notifier); #endif   return 0; }

於是看pci設備的核心結構體

1
2 3 4 5 6 7 8 9 10 11 12 13 
static struct pci_driver ixgbe_driver = {  .name = ixgbe_driver_name,  .id_table = ixgbe_pci_tbl,  .probe = ixgbe_probe,  .remove = ixgbe_remove, #ifdef CONFIG_PM  .suspend = ixgbe_suspend,  .resume = ixgbe_resume, #endif  .shutdown = ixgbe_shutdown,  .sriov_configure = ixgbe_pci_sriov_configure,  .err_handler = &ixgbe_err_handler };

當設備加載成功后,會執行ixgbe_probe函數

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) {  /*分配struct net_device *netdev 結構體*/  netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);   if (!netdev) {  err = -ENOMEM;  goto err_alloc_etherdev;  }   SET_NETDEV_DEV(netdev, &pdev->dev);   /*分配struct ixgbe_adapter *adapter結構體*/  adapter = netdev_priv(netdev);   /*分配dev結構體的ops函數指針集合*/  netdev->netdev_ops = &ixgbe_netdev_ops;   err = ixgbe_sw_init(adapter);   err = ixgbe_init_interrupt_scheme(adapter);  /*設備注冊完畢*/<br>  err = register_netdev(netdev); }

重點看ixgbe_init_interrupt_scheme(adapter)函數,該函數里面會初始化adapter結構體以及napi相關的東西

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 
int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) {   err = ixgbe_alloc_q_vectors(adapter);  } static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) {   if (q_vectors >= (rxr_remaining + txr_remaining)) {  for (; rxr_remaining; v_idx++) {  err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,  0, 0, 1, rxr_idx);   if (err)  goto err_out;   /* update counts and index */  rxr_remaining--;  rxr_idx++;  }  } } static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,  int v_count, int v_idx,  int txr_count, int txr_idx,  int rxr_count, int rxr_idx) {  /* setup affinity mask and node */  if (cpu != -1)  cpumask_set_cpu(cpu, &q_vector->affinity_mask);  q_vector->numa_node = node;  #ifdef CONFIG_IXGBE_DCA  /* initialize CPU for DCA */  q_vector->cpu = -1;  #endif  /* initialize NAPI */  netif_napi_add(adapter->netdev, &q_vector->napi,  ixgbe_poll, 64);  napi_hash_add(&q_vector->napi); }

到此為止,網卡設置初始化完畢  

其中涉及到如下幾個結構體

ixgbe_adapter
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 
/* board specific private data structure */ struct ixgbe_adapter {   //發送的rings  struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;   //接收的rings  struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];   //這個vector里面包含了napi結構  //應該是跟下面的entries一一對應起來做為是一個中斷向量的東西吧  struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];   //這個里面估計是MSIX的多個中斷對應的響應接口  struct msix_entry *msix_entries; }  struct ixgbe_q_vector {  struct ixgbe_adapter *adapter; ifdef CONFIG_IXGBE_DCA  int cpu; /* CPU for DCA */ #endif  u16 v_idx; /* index of q_vector within array, also used for  * finding the bit in EICR and friends that  * represents the vector for this ring */  u16 itr; /* Interrupt throttle rate written to EITR */  struct ixgbe_ring_container rx, tx;   struct napi_struct napi;/*napi結構體*/  cpumask_t affinity_mask;  int numa_node;  struct rcu_head rcu; /* to avoid race with update stats on free */  char name[IFNAMSIZ + 9];   /* for dynamic allocation of rings associated with this q_vector */  struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; };  struct napi_struct {  /* The poll_list must only be managed by the entity which  * changes the state of the NAPI_STATE_SCHED bit. This means  * whoever atomically sets that bit can add this napi_struct  * to the per-cpu poll_list, and whoever clears that bit  * can remove from the list right before clearing the bit.  */  struct list_head poll_list;   unsigned long state;  int weight;  unsigned int gro_count;  int (*poll)(struct napi_struct *, int);//poll的接口實現 #ifdef CONFIG_NETPOLL  spinlock_t poll_lock;  int poll_owner; #endif  struct net_device *dev;  struct sk_buff *gro_list;  struct sk_buff *skb;  struct list_head dev_list; };

然后當我們ifconfig dev up 時,會執行dev_ops->open函數

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 
static int ixgbe_open(struct net_device *netdev) {  /* allocate transmit descriptors */  err = ixgbe_setup_all_tx_resources(adapter);  if (err)  goto err_setup_tx;   /* allocate receive descriptors */  err = ixgbe_setup_all_rx_resources(adapter);  /*注冊中斷*/  err = ixgbe_request_irq(adapter); }  static int ixgbe_request_irq(struct ixgbe_adapter *adapter) {  struct net_device *netdev = adapter->netdev;  int err;   if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)  err = ixgbe_request_msix_irqs(adapter);  else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)  err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,  netdev->name, adapter);  else  err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,  netdev->name, adapter);   if (err)  e_err(probe, "request_irq failed, Error %d\n", err);   return err; }  static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) {  for (vector = 0; vector < adapter->num_q_vectors; vector++) {  struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];  struct msix_entry *entry = &adapter->msix_entries[vector];   err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,  q_vector->name, q_vector);  } }

從上面的代碼流程可以看出,最終注冊的中斷處理函數為ixgbe_msix_clean_rings

1
2 3 4 5 6 7 8 9 10 11 
static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data) {  struct ixgbe_q_vector *q_vector = data;   /* EIAM disabled interrupts (on this vector) for us */   if (q_vector->rx.ring || q_vector->tx.ring)  napi_schedule(&q_vector->napi);   return IRQ_HANDLED; }

從上述代碼中可以看,該中斷處理函數僅僅作為napi的調度者

當數據包到來時,首先喚醒硬中斷執行ixgbe_msix_clean_rings函數,最終napi_schedule會調用 __raise_softirq_irqoff 去觸發一個軟中斷NET_RX_SOFTIRQ,然后又對應的軟中斷接口去實現往上的協議棧邏輯

然后看看napi 調度函數都做了些什么工作

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 
static inline void napi_schedule(struct napi_struct *n) {  if (napi_schedule_prep(n))  __napi_schedule(n); } void __napi_schedule(struct napi_struct *n) {  unsigned long flags;   local_irq_save(flags);  ____napi_schedule(this_cpu_ptr(&softnet_data), n);  local_irq_restore(flags); }  最終可以看出napi調度函數把napi結構體掛到了per cpu的私有數據結構softnet_data上 struct softnet_data {  struct Qdisc *output_queue;  struct Qdisc **output_queue_tailp;  struct list_head poll_list;  struct sk_buff *completion_queue;  struct sk_buff_head process_queue;   /* stats */  unsigned int processed;  unsigned int time_squeeze;  unsigned int cpu_collision;  unsigned int received_rps;  #ifdef CONFIG_RPS  struct softnet_data *rps_ipi_list;   /* Elements below can be accessed between CPUs for RPS */  struct call_single_data csd ____cacheline_aligned_in_smp;  struct softnet_data *rps_ipi_next;  unsigned int cpu;  unsigned int input_queue_head;  unsigned int input_queue_tail; #endif  unsigned int dropped;  struct sk_buff_head input_pkt_queue;  struct napi_struct backlog;/*napi結構體里面的雙向鏈表中*/ };

NET_RX_SOFTIRQ是收到數據包的軟中斷信號對應的接口是net_rx_action

NET_TX_SOFTIRQ是發送完數據包后的軟中斷信號對應的接口是net_tx_action  

1
2 3 4 5 6 7 8 9 10 11 12 13 14 
static void net_rx_action(struct softirq_action *h) {  /* 獲取每個cpu的數據*/  struct softnet_data *sd = this_cpu_ptr(&softnet_data);  while (!list_empty(&sd->poll_list)) {  struct napi_struct *n;  n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);   if (test_bit(NAPI_STATE_SCHED, &n->state)) {  work = n->poll(n, weight);  trace_napi_poll(n);  }  } }

於是就執行到初始化napi結構體中的poll函數,在這里為ixgbe_poll

1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 
int ixgbe_poll(struct napi_struct *napi, int budget) {  struct ixgbe_q_vector *q_vector =  container_of(napi, struct ixgbe_q_vector, napi);  struct ixgbe_adapter *adapter = q_vector->adapter;  struct ixgbe_ring *ring;  int per_ring_budget;  bool clean_complete = true;  #ifdef CONFIG_IXGBE_DCA  if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)  ixgbe_update_dca(q_vector); #endif   ixgbe_for_each_ring(ring, q_vector->tx)  clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring);   if (!ixgbe_qv_lock_napi(q_vector))  return budget;   /* attempt to distribute budget to each queue fairly, but don't allow  * the budget to go below 1 because we'll exit polling */  if (q_vector->rx.count > 1)  per_ring_budget = max(budget/q_vector->rx.count, 1);  else  per_ring_budget = budget;   ixgbe_for_each_ring(ring, q_vector->rx)  clean_complete &= (ixgbe_clean_rx_irq(q_vector, ring,  per_ring_budget) < per_ring_budget);   ixgbe_qv_unlock_napi(q_vector);  /* If all work not completed, return budget and keep polling */  if (!clean_complete)  return budget;   /* all work done, exit the polling mode */  napi_complete(napi);  if (adapter->rx_itr_setting & 1)  ixgbe_set_itr(q_vector);  if (!test_bit(__IXGBE_DOWN, &adapter->state))  ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));   return 0; }  static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,  struct ixgbe_ring *rx_ring,  const int budget) {  ixgbe_rx_skb(q_vector, skb); }  static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,  struct sk_buff *skb) {  if (ixgbe_qv_busy_polling(q_vector))  netif_receive_skb(skb);  else  napi_gro_receive(&q_vector->napi, skb); }  int netif_receive_skb(struct sk_buff *skb) {  int ret;   net_timestamp_check(netdev_tstamp_prequeue, skb);   if (skb_defer_rx_timestamp(skb))  return NET_RX_SUCCESS;   rcu_read_lock();  #ifdef CONFIG_RPS  if (static_key_false(&rps_needed)) {  struct rps_dev_flow voidflow, *rflow = &voidflow;  int cpu = get_rps_cpu(skb->dev, skb, &rflow);   if (cpu >= 0) {  ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);  rcu_read_unlock();  return ret;  }  } #endif  /*最終協議棧開始收報*/  ret = __netif_receive_skb(skb);  rcu_read_unlock();  return ret; }


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM