軟中斷
首先明確一個概念軟中斷(不是軟件中斷int n)。總來來說軟中斷就是內核在啟動時為每一個內核創建了一個特殊的進程,這個進程會不停的poll檢查是否有軟中斷需要執行,如果需要執行則調用注冊的接口函數。所以軟中斷是運行在進程上下文的,而且可能並發執行在不同CPU上。所謂的軟中斷就是內核利用內核線程配合抽象的數據結構進行管理線程合適時間調用注冊的接口的一套軟件管理機制。
先看管理軟中斷的數據結構因為數據結構最能說明邏輯內核對軟件中斷抽象的數據結構主要有如下幾個部分。
中斷服務接口管理
在內核中聲明在\kernel\softirq.c中如下
#ifndef __ARCH_IRQ_STAT irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; EXPORT_SYMBOL(irq_stat); #endif static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; struct softirq_action { void (*action)(struct softirq_action *); };
其中的NR_SOFTIRQS由軟中斷類型的枚舉對象提供如下定義:
enum { HI_SOFTIRQ=0, TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS };
之所以綜上可以知道內核維護了一個struct softirq_action類型的軟中斷接口數組,而軟中斷的狀態則是由前面的 irq_cpustat_t 類型的數組管理,由定義可以知道狀態是和CPU關聯的,表示某一個CPU上的軟中斷狀態。下面看看irq_cpustat_t 的定義,也非常的的簡單主要就是 其中的 __softirq_pending成員,這個成員的每一個bit表示一種類型的中斷類型的狀態信息,並且低bit的中斷類型的中斷優先級高。
typedef struct { unsigned int __softirq_pending;//標記是否有軟中斷懸起 long idle_timestamp; /* 統計信息 */ /* Hard interrupt statistics. */ unsigned int irq_timer_count; unsigned int irq_syscall_count; unsigned int irq_resched_count; unsigned int irq_hv_flush_count; unsigned int irq_call_count; unsigned int irq_hv_msg_count; unsigned int irq_dev_intr_count; } ____cacheline_aligned irq_cpustat_t;
在通過Tasklet接接口中斷的創建就可以知道軟件中斷的注冊(open_softirq)過程就是修改前面定義的softirq_vec數組,就可以完成軟件中斷的注冊,而驅動開發人員也很少直接使用軟件中斷。
//接口中的nr就是上面枚舉值,action就是軟中斷服務函數 open_softirq(int nr,void(*action)(struct softirq_action *));
再看內核在啟動時為每個CPU創建的線程操作:
static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, .thread_comm = "ksoftirqd/%u", }; static __init int spawn_ksoftirqd(void) { register_cpu_notifier(&cpu_nfb); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); return 0; } early_initcall(spawn_ksoftirqd);
重點是這個接口函數 smpboot_register_percpu_thread如下:
/** * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug * @plug_thread: Hotplug thread descriptor * * Creates and starts the threads on all online cpus. */ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) { unsigned int cpu; int ret = 0; get_online_cpus(); mutex_lock(&smpboot_threads_lock); for_each_online_cpu(cpu) { ret = __smpboot_create_thread(plug_thread, cpu); if (ret) { smpboot_destroy_threads(plug_thread); goto out; } smpboot_unpark_thread(plug_thread, cpu); } list_add(&plug_thread->list, &hotplug_threads); out: mutex_unlock(&smpboot_threads_lock); put_online_cpus(); return ret; }
傳進來的參數是 softirq_threads,先獲取在線即激活的CPU然后遍歷調用__smpboot_create_thread 參數同樣是前面定義的softirq_threads繼續向下看:
tatic int __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); struct smpboot_thread_data *td; if (tsk) return 0; td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); if (!td) return -ENOMEM; td->cpu = cpu; td->ht = ht; tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, ht->thread_comm); if (IS_ERR(tsk)) { kfree(td); return PTR_ERR(tsk); } get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; if (ht->create) { /* * Make sure that the task has actually scheduled out * into park position, before calling the create * callback. At least the migration thread callback * requires that the task is off the runqueue. */ if (!wait_task_inactive(tsk, TASK_PARKED)) WARN_ON(1); else ht->create(cpu); } return 0; }
看創建了一個內核線程在特定CPU上通過kthread_create_on_cpu(smpboot_thread_fn, td, cpu,ht->thread_comm)接口,不在往深入繼續看,這里只需要創建了一個綁定CPU的線程,線程函數是smpboot_thread_fn這個比較重要需要詳細看一下。傳入的data就是一個struct smpboot_thread_data類型的數據這個數據中保存了softirq_threads在ht中如下,進程開始運行時先關閉搶占,檢查是否需要停止當前線程如果需要則立馬停止當前線程,這里肯定不需要停止除非是關機(我的理解)。然就是檢查是否要暫停,因為用戶的軟中斷接口可能調用阻塞接口會阻塞當前內爾后進程所以需要暫停當前線程最后的恢復也是有用戶軟件中斷服務函數完成(我的理解)最后部分源碼注釋如下:
static int smpboot_thread_fn(void *data) { struct smpboot_thread_data *td = data; struct smp_hotplug_thread *ht = td->ht; while (1) { set_current_state(TASK_INTERRUPTIBLE);
//關閉內核搶占機制 preempt_disable();
//是否需要停止當前線程關機時才執行?? if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->cleanup) ht->cleanup(td->cpu, cpu_online(td->cpu)); kfree(td); return 0; } if (kthread_should_park()) { __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->park && td->status == HP_THREAD_ACTIVE) { BUG_ON(td->cpu != smp_processor_id()); ht->park(td->cpu); td->status = HP_THREAD_PARKED; } kthread_parkme(); /* We might have been woken for stop */ continue; } BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { case HP_THREAD_NONE: __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->setup) ht->setup(td->cpu); td->status = HP_THREAD_ACTIVE; continue; case HP_THREAD_PARKED: __set_current_state(TASK_RUNNING); preempt_enable(); if (ht->unpark) ht->unpark(td->cpu); td->status = HP_THREAD_ACTIVE; continue; } /* * 就是通過調用ksoftirqd_should_run 這是在一開始定義的softirq_threads中指定的,檢查當前CPU上維護的軟件中斷數組中是否有中斷 * 的置起了從而決定當前的軟件中斷線程是否需要執行,不需要執行則放棄時間片 */ if (!ht->thread_should_run(td->cpu)) {
/*
*沒有需要的軟件中斷需要執行,則放棄時間片
*/ preempt_enable_no_resched(); schedule(); } else { /* * 有中斷需要執行則直接調用 run_ksoftirqd 執行軟件中斷注冊的接口的調用 */ __set_current_state(TASK_RUNNING); preempt_enable(); //這個接口在上面初始化時綁定為run_ksoftirqd ht->thread_fn(td->cpu); } } }
可以看到run_ksoftirqd如下:
static void run_ksoftirqd(unsigned int cpu) { local_irq_disable(); if (local_softirq_pending()) { /* * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ __do_softirq(); local_irq_enable(); cond_resched_rcu_qs(); return; } local_irq_enable(); }
關閉本CPU上的硬中斷然后執行__do_softirq();這個是軟件中斷的重點接口如下,注釋了一部分:
asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; __u32 pending; int softirq_bit; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current->flags &= ~PF_MEMALLOC; //保存懸起的軟件中斷的位圖 pending = local_softirq_pending(); account_irq_enter_time(current); //標記進入軟件中斷上下文 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ //清除懸起的軟件中斷的位圖 set_softirq_pending(0); //開啟硬件中斷 local_irq_enable(); //取軟件中斷的全局中斷接口鏈表 h = softirq_vec; //判斷是否有懸起的軟件中斷bit,返回地最低置起的bit位置 1開始而不是0,軟中斷也是由優先級的低bit優先 while ((softirq_bit = ffs(pending))) { unsigned int vec_nr; int prev_count; //取出對應的中斷對象 h += softirq_bit - 1; //取出對應的中斷index vec_nr = h - softirq_vec; prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); //執行軟件中斷注冊的接口函數 h->action(h); trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } //清除剛才處理過的中斷bit並右移動整個位圖,然后移動軟件中斷句柄 h++; pending >>= softirq_bit; //移動后繼續回去處理剩下置起的bit } //到這里說明本次進來時置起的bit全部處理完了 rcu_bh_qs(); local_irq_disable(); //再檢查在處理期間有無新置起的軟件中斷,如果有則需要繼續處理軟件中斷 pending = local_softirq_pending(); if (pending) { /* *又有新的軟件標志置起需要處理,則開始處理,這里有一個保護機制,因為軟件中斷的優先級是很高的相對於用戶進程如果軟件中斷 *源源不斷則需要進行保護避免其他進程無法運行而導致系統實時性差,這里有三個條件一個步滿足就會會停止本次的軟件中斷的執行 *而先去執行其他進程調度 *1、軟中斷處理時間不超過2jiffies,200Hz的系統對應10ms; *2、當前沒有有進程需要調度,即!need_resched(); *3、這種循環不超過MAX_SOFTIRQ_RESTART次 一般是10 */ if (time_before(jiffies, end) && !need_resched() && --max_restart) goto restart; //不滿足其中一個條件則重新喚醒ksoftirq內核線程來處理軟中斷,因為這個函數可能在中斷上下文執行所以需要進行限制 wakeup_softirqd(); } lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); //使能中斷底半部 __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }
注意軟件中斷的處理過程對軟中斷連續執行的時間進行了限制其實是有原因的,因為上述軟中斷處理部分的代碼執行機會有可能在中斷上下文irq_exit()具體的調用鏈就是irq_exit()->invoke_softirq()->wakeup_softirq()如下(可參考硬中斷的分析過程):
void irq_exit(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_disable(); #else WARN_ON_ONCE(!irqs_disabled()); #endif account_irq_exit_time(current); preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); tick_irq_exit(); rcu_irq_exit(); trace_hardirq_exit(); /* must be last! */ } static inline void invoke_softirq(void) { if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if * it is the irq stack, because it should be near empty * at this stage. */ __do_softirq(); #else /* * Otherwise, irq_exit() is called on the task stack that can * be potentially deep already. So call softirq in its own stack * to prevent from any overrun. */ do_softirq_own_stack(); #endif } else { wakeup_softirqd(); } }
asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; __u32 pending; int softirq_bit; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending();------------------------------獲取當前CPU的軟中斷寄存器__softirq_pending值到局部變量pending。 account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);----------------增加preempt_count中的softirq域計數,表明當前在軟中斷上下文中。 in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0);-----------------------------------------清除軟中斷寄存器__softirq_pending。 local_irq_enable();---------------------------------------------打開本地中斷 h = softirq_vec;------------------------------------------------指向softirq_vec第一個元素,即軟中斷HI_SOFTIRQ對應的處理函數。 while ((softirq_bit = ffs(pending))) {--------------------------ffs()找到pending中第一個置位的比特位,返回值是第一個為1的位序號。這里的位是從低位開始,這也和優先級相吻合,低位優先得到執行。如果沒有則返回0,退出循環。 unsigned int vec_nr; int prev_count; h += softirq_bit - 1;---------------------------------------根據sofrirq_bit找到對應的軟中斷描述符,即軟中斷處理函數。 vec_nr = h - softirq_vec;-----------------------------------軟中斷序號 prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); h->action(h);-----------------------------------------------執行對應軟中斷函數 trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } h++;-------------------------------------------------------h遞增,指向下一個軟中斷 pending >>= softirq_bit;-----------------------------------pending右移softirq_bit位 } rcu_bh_qs(); local_irq_disable();-------------------------------------------關閉本地中斷 pending = local_softirq_pending();-----------------------------再次檢查是否有軟中斷產生,在上一次檢查至此這段時間有新軟中斷產生。 if (pending) { if (time_before(jiffies, end) && !need_resched() && --max_restart)-----------------------------------------再次觸發軟中斷執行的三個條件:1.軟中斷處理時間不超過2jiffies,200Hz的系統對應10ms;2.當前沒有有進程需要調度,即!need_resched();3.這種循環不超過10次。 goto restart; wakeup_softirqd();-----------------------------------------如果上面的條件不滿足,則喚醒ksoftirq內核線程來處理軟中斷。 } lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET);----------------------------減少preempt_count的softirq域計數,和前面增加計數呼應。表示這段代碼處於軟中斷上下文。 WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }
wakeup_softirq()首先獲取當前CPU的ksoftirqd線程的task_struct。
如果當前task不處於TASK_RUNNING,則去喚醒此進程。
static void wakeup_softirqd(void) { /* Interrupts are disabled: no need to stop preemption */ struct task_struct *tsk = __this_cpu_read(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); }