文件:kthread.c
1 /** 2 * kthread_create - create a kthread. 3 * @threadfn: the function to run until signal_pending(current). 4 * @data: data ptr for @threadfn. 5 * @namefmt: printf-style name for the thread. 6 * 7 * Description: This helper function creates and names a kernel 8 * thread. The thread will be stopped: use wake_up_process() to start 9 * it. See also kthread_run(), kthread_create_on_cpu(). 10 *這里面給出了如何運行一個內核線程的流程。
參數說明:threadfn是指線程回調函數,data為回調函數的參數,namefmt為線程名字 11 * When woken, the thread will run @threadfn() with @data as its argument.
12 @threadfn() can either call do_exit() directly if it is a standalone thread for which noone will call kthread_stop(), or 14 * return when 'kthread_should_stop()' is true (which means kthread_stop() has been called).
The return value should be zero or a negative error number; it will be passed to kthread_stop(). 17 * 18 * Returns a task_struct or ERR_PTR(-ENOMEM). 19 */ 20 struct task_struct *kthread_create(int (*threadfn)(void *data), 21 void *data, 22 const char namefmt[], 23 ...) 24 { 25 struct kthread_create_info create; 26 //初始化線程創建描述符 27 create.threadfn = threadfn; 28 create.data = data;
/*kthread_create采用了完成量機制,可以查閱等待量機制的原理*/ 29 init_completion(&create.done); 30 31 spin_lock(&kthread_create_lock); 32 list_add_tail(&create.list, &kthread_create_list); //將線程隊列加入到全局線程創建隊列中
/*注意這個全局鏈表kthread_create_list, 所用通過kthread_create創建的內核線程都會掛在這*/ 33 spin_unlock(&kthread_create_lock);
34 /*這是最重要的地方,從代碼看是喚醒了kthreadd_task這個進程,如果對代碼比較熟悉的話,就會想到這是內核中 的1號進程kthreadd*/ 35 wake_up_process(kthreadd_task);
/*當前進程在完成量上睡眠等待*/ 36 wait_for_completion(&create.done); 37 38 if (!IS_ERR(create.result)) { 39 struct sched_param param = { .sched_priority = 0 }; 40 va_list args; 41 42 va_start(args, namefmt); 43 vsnprintf(create.result->comm, sizeof(create.result->comm), 44 namefmt, args); 45 va_end(args); 46 /*設置線程屬性,包括調度策略, 47 * root may have changed our (kthreadd's) priority or CPU mask. 48 * The kernel thread should not inherit these properties. 49 */ 50 sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); 51 set_user_nice(create.result, KTHREAD_NICE_LEVEL);
//設置調度屬性,具體原理參考http://www.cnblogs.com/papam/archive/2009/08/27/1555353.html 52 set_cpus_allowed_ptr(create.result, cpu_all_mask);
在多核架構的內核中,對於每個cpu,都有一個struct rq* ptr靜態結構體指針變量對應,但是通過傳統方法無法得到該靜態指針變量的值,通過閱讀/kernel/sched.c函數,在導出函數set_cpus_allowed_ptr中使用過該指針變量,具體參考:http://wanderer-zjhit.blogbus.com/logs/186876356.html 53 } 54 return create.result; 55 } 56 EXPORT_SYMBOL(kthread_create);
首先分析一下重要的一個函數:
1 int wake_up_process(struct task_struct *p) 2 { 3 return try_to_wake_up(p, TASK_ALL, 0); 4 }
1 /*** 2 * try_to_wake_up - wake up a thread 3 * @p: the to-be-woken-up thread 4 * @state: the mask of task states that can be woken 5 * @sync: do a synchronous wakeup? 6 *看這里的解釋,讓喚醒的進程進入運行隊列, 7 * Put it on the run-queue if it's not already there. The "current" 8 * thread is always on the run-queue (except when the actual 9 * re-schedule is in progress), and as such you're allowed to do 10 * the simpler "current->state = TASK_RUNNING" to mark yourself 11 * runnable without the overhead of this. 12 * 13 * returns failure only if the task is already active. 14 */ 15 static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) 16 { 17 int cpu, orig_cpu, this_cpu, success = 0; 18 unsigned long flags; 19 long old_state; 20 struct rq *rq; 21 22 if (!sched_feat(SYNC_WAKEUPS)) 23 sync = 0; 24 25 #ifdef CONFIG_SMP 26 if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { 27 struct sched_domain *sd; 28 29 this_cpu = raw_smp_processor_id(); 30 cpu = task_cpu(p); 31 32 for_each_domain(this_cpu, sd) { 33 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { 34 update_shares(sd); 35 break; 36 } 37 } 38 } 39 #endif 40 41 smp_wmb();
1. 關閉本地中斷並給本地可執行隊列rq加鎖 42 rq = task_rq_lock(p, &flags); 43 update_rq_clock(rq);
2. 如果當前進程狀態p->state不在要喚醒的進程狀態集中,則不能喚醒該進程 44 old_state = p->state; 45 if (!(old_state & state)) 46 goto out; 47 3. 如果當前進程本身就在可執行隊列中,則無需喚醒本進程 48 if (p->se.on_rq) 49 goto out_running; 50 4. task_cpu(p)返回當前進程p所使用的CPU編號(p所歸屬的runqueue所在的CPU編號) 51 cpu = task_cpu(p); 52 orig_cpu = cpu; 53 this_cpu = smp_processor_id(); 54 55 #ifdef CONFIG_SMP 56 if (unlikely(task_running(rq, p))) 57 goto out_activate; 58 59 cpu = p->sched_class->select_task_rq(p, sync); 60 if (cpu != orig_cpu) { 61 set_task_cpu(p, cpu); 62 task_rq_unlock(rq, &flags); 63 /* might preempt at this point */ 64 rq = task_rq_lock(p, &flags); 65 old_state = p->state; 66 if (!(old_state & state)) 67 goto out; 68 if (p->se.on_rq) 69 goto out_running; 70 71 this_cpu = smp_processor_id(); 72 cpu = task_cpu(p); 73 } 74 75 #ifdef CONFIG_SCHEDSTATS 76 schedstat_inc(rq, ttwu_count); 77 if (cpu == this_cpu) 78 schedstat_inc(rq, ttwu_local); 79 else { 80 struct sched_domain *sd; 81 for_each_domain(this_cpu, sd) { 82 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { 83 schedstat_inc(sd, ttwu_wake_remote); 84 break; 85 } 86 } 87 } 88 #endif /* CONFIG_SCHEDSTATS */ 89 90 out_activate: 91 #endif /* CONFIG_SMP */ 92 schedstat_inc(p, se.nr_wakeups); 93 if (sync) 94 schedstat_inc(p, se.nr_wakeups_sync); 95 if (orig_cpu != cpu) 96 schedstat_inc(p, se.nr_wakeups_migrate); 97 if (cpu == this_cpu) 98 schedstat_inc(p, se.nr_wakeups_local); 99 else 100 schedstat_inc(p, se.nr_wakeups_remote);
更新喚醒進程p的平均睡眠時間sleep_avg和動態優先級prio;記錄該進程喚醒前的睡眠狀態;將該進程插入活躍優先級數組 101 activate_task(rq, p, 1); 102 success = 1; 103 104 /* 105 * Only attribute actual wakeups done by this task. 106 */ 107 if (!in_interrupt()) { 108 struct sched_entity *se = ¤t->se; 109 u64 sample = se->sum_exec_runtime; 110 111 if (se->last_wakeup) 112 sample -= se->last_wakeup; 113 else 114 sample -= se->start_runtime; 115 update_avg(&se->avg_wakeup, sample); 116 117 se->last_wakeup = se->sum_exec_runtime; 118 } 119 如果喚醒進程p的動態優先級prio比當前進程current的動態優先級高則當前進程的TIF_NEED_RESCHED就需要設置 120 out_running: 121 trace_sched_wakeup(rq, p, success); 122 check_preempt_curr(rq, p, sync); 123 124 p->state = TASK_RUNNING; 125 #ifdef CONFIG_SMP 126 if (p->sched_class->task_wake_up) 127 p->sched_class->task_wake_up(rq, p); 128 #endif 129 out: 130 task_rq_unlock(rq, &flags); 131 132 return success; 133 }
由於電池問題,暫時分析到這里,有幾個問題,目前需要弄清楚:
1)線程如何進行管理的?
2)kthreadd_task,kthread_create_list具體的作用是什么?
kthread_create_list這個隊列唯一被調用的地方,是在int kthreadd(void *unused)函數中。
1 int kthreadd(void *unused) 2 { 3 struct task_struct *tsk = current; 4 5 /* Setup a clean context for our children to inherit. */ 6 set_task_comm(tsk, "kthreadd"); 7 ignore_signals(tsk); 8 set_user_nice(tsk, KTHREAD_NICE_LEVEL); 9 set_cpus_allowed_ptr(tsk, cpu_all_mask); 10 set_mems_allowed(node_possible_map); 11 12 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; 13 14 for (;;) { 15 set_current_state(TASK_INTERRUPTIBLE); 16 if (list_empty(&kthread_create_list)) //如果隊列是空,進行調度 17 schedule(); 18 __set_current_state(TASK_RUNNING); //設置當前狀態為運行狀態 19 20 spin_lock(&kthread_create_lock); 21 while (!list_empty(&kthread_create_list)) { 22 struct kthread_create_info *create; 23 24 create = list_entry(kthread_create_list.next, 25 struct kthread_create_info, list); //從list中取出需要創建的線程描述符 26 list_del_init(&create->list); 27 spin_unlock(&kthread_create_lock); 28 29 create_kthread(create); 30 31 spin_lock(&kthread_create_lock); 32 } 33 spin_unlock(&kthread_create_lock); 34 } 35 36 return 0; 37 }
1 static void create_kthread(struct kthread_create_info *create) 2 { 3 int pid; 4 5 /* We want our own signal handler (we take no signals by default). */ 6 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); //創建一個新的進程。 7 if (pid < 0) { 8 create->result = ERR_PTR(pid); 9 complete(&create->done); 10 } 11 }
而對ktheadd調用的地方為:
1 static noinline void __init_refok rest_init(void) 2 __releases(kernel_lock) 3 { 4 int pid; 5 6 kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); 7 numa_default_policy(); 8 pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); //線程添加函數,在系統初始化的時候開啟 9 kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); 10 unlock_kernel();
從以上的分析可以看到,在系統啟動的時候,創建一個進程,運行kthreadd函數,而ktheadd函數循環判斷線程隊列是否為空,如果為空則調度出去,否則取出新添加的線程描述符,創建一個新的線程(進程);這個也就是kthread_create_list的作用。
再就是kthreadd_task這個變量。初始化在__init_refok rest_init(void)函數中,通過函數名可以看出來,task作用是通過pid找到任務。內核中調用kthread_task總共三處,一處是在初始化的時候,另外兩處是:static void reparent_to_kthreadd(void),kthread_create();
線程創建時通過wake_up_process(kthreadd_task);喚醒ktheadd_task,然后將線程描述符添加到進程管理中,而在reparent_to_kthreadd中則是對當前進程進行設置,
current->real_parent = current->parent = kthreadd_task;
線程創建的過程就是以上的過程,剩下的就是對進程管理進行分析的。