一、版本說明
嵌入式Linux 下面的reboot命令看似簡單,但出問題時定位起來發現別有洞天。
下面就按在shell下執行reboot命令之后程序的執行過程進行解析。
Busybox:1.23.2 ——制作跟文件系統,/sbin/reboot程序的由來
Libc:2.6.1 ——標准C庫
Linux kernel:2.6.35 ——內核版本
二、流程簡介
如圖所示是reboot的簡要流程圖。
普通的reboot是通過busybox為入口,進入halt_main函數,然后給init進程發送SIGTERM信號,init進程接收到信號后給其他進程發送終止信號,最后調用C庫函數reboot,reboot通過系統調用sys_reboot進入內核,內核將整個系統重啟。其中在shell中執行reboot –f則通過halt_main直接調用C函數reboot,不經過init進程。
三、代碼詳解
1.reboot命令端
執行reboot命令,busybox檢查當前命令為reboot,進入函數halt_main,
reboot,halt和poweroff都會進入這個函數,不同的命令發送的信號和執行的操作不同。
現只分析reboot的情況。
代碼如下
int halt_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int halt_main(int argc UNUSED_PARAM, char **argv)
{
static const int magic[] = {
RB_HALT_SYSTEM,
RB_POWER_OFF,
RB_AUTOBOOT
};
static const smallint signals[] = { SIGUSR1, SIGUSR2, SIGTERM };
int delay = 0;
int which, flags, rc;
/* Figure out which applet we're running */
for (which = 0; "hpr"[which] != applet_name[0]; which++)
continue;
/* Parse and handle arguments */
opt_complementary = "d+"; /* -d N */
/* We support -w even if !ENABLE_FEATURE_WTMP,
* in order to not break scripts.
* -i (shut down network interfaces) is ignored.
*/
flags = getopt32(argv, "d:nfwi", &delay);
sleep(delay);
write_wtmp();
if (flags & 8) /* -w */
return EXIT_SUCCESS;
if (!(flags & 2)) /* no -n */
sync();
/* Perform action. */
rc = 1;
if (!(flags & 4)) { /* no -f */
//TODO: I tend to think that signalling linuxrc is wrong
// pity original author didn't comment on it...
if (ENABLE_FEATURE_INITRD) {
/* talk to linuxrc */
/* bbox init/linuxrc assumed */
pid_t *pidlist = find_pid_by_name("linuxrc");
if (pidlist[0] > 0)
rc = kill(pidlist[0], signals[which]);
if (ENABLE_FEATURE_CLEAN_UP)
free(pidlist);
}
if (rc) {
/* talk to init */
if (!ENABLE_FEATURE_CALL_TELINIT) {
/* bbox init assumed */
rc = kill(1, signals[which]);
} else {
/* SysV style init assumed */
/* runlevels:
* 0 == shutdown
* 6 == reboot */
execlp(CONFIG_TELINIT_PATH,
CONFIG_TELINIT_PATH,
which == 2 ? "6" : "0",
(char *)NULL
);
bb_perror_msg_and_die("can't execute '%s'",
CONFIG_TELINIT_PATH);
}
}
} else {
rc = reboot(magic[which]);
}
if (rc)
bb_perror_nomsg_and_die();
return rc;
}
該函數判斷reboot是否帶了 -f 參數,如果帶了,直接調用reboot調用C函數庫
如果沒帶,則通過
kill(1, signals[which]);
給init進程發送SIGTERM信號。
2.init進程端
init進程初始化函數init_main將部分信號進行重定義
bb_signals_recursive_norestart(0
+ (1 << SIGINT) /* Ctrl-Alt-Del */
+ (1 << SIGQUIT) /* re-exec another init */
#ifdef SIGPWR
+ (1 << SIGPWR) /* halt */
#endif
+ (1 << SIGUSR1) /* halt */
+ (1 << SIGTERM) /* reboot */
+ (1 << SIGUSR2) /* poweroff */
#if ENABLE_FEATURE_USE_INITTAB
+ (1 << SIGHUP) /* reread /etc/inittab */
#endif
, record_signo);
void record_signo(int signo)
{
bb_got_signal = signo;
}
將SIGUSR1(halt) SIGUSR2(poweroff) SIGTERM(reboot)信號存入全局變量bb_got_signal中。
在init_main的最后進入一個while(1)循環,不斷檢查信號和等待子進程的退出
其中check_delayed_sigs就是用來檢查這個全局變量的,如下:
while (1) {
int maybe_WNOHANG;
maybe_WNOHANG = check_delayed_sigs();
/* (Re)run the respawn/askfirst stuff */
run_actions(RESPAWN | ASKFIRST);
maybe_WNOHANG |= check_delayed_sigs();
/* Don't consume all CPU time - sleep a bit */
sleep(1);
maybe_WNOHANG |= check_delayed_sigs();
/* Wait for any child process(es) to exit.
*
* If check_delayed_sigs above reported that a signal
* was caught, wait will be nonblocking. This ensures
* that if SIGHUP has reloaded inittab, respawn and askfirst
* actions will not be delayed until next child death.
*/
if (maybe_WNOHANG)
maybe_WNOHANG = WNOHANG;
while (1) {
pid_t wpid;
struct init_action *a;
/* If signals happen _in_ the wait, they interrupt it,
* bb_signals_recursive_norestart set them up that way
*/
wpid = waitpid(-1, NULL, maybe_WNOHANG);
if (wpid <= 0)
break;
a = mark_terminated(wpid);
if (a) {
message(L_LOG, "process '%s' (pid %d) exited. "
"Scheduling for restart.",
a->command, wpid);
}
/* See if anyone else is waiting to be reaped */
maybe_WNOHANG = WNOHANG;
}
} /* while (1) */
而里面的while(1)一般會阻塞在waitpid中,那么信號檢查是不是會有問題?
WNOHANG 如果沒有可用的子進程退出狀態,立即返回而不是阻塞
但maybe_WNOHANG的值應該是0,不是WNOHANG(=1)感覺還是會阻塞。我這樣理解的,因為所有的用戶進程都是init進程的子進程,我判斷前面執行reboot時也是一個子進程,halt_main發送完信號后就會退出,init接收到信號而且waitpid成功,然后跳出循環檢查信號。
下面看一下信號的處理部分
static int check_delayed_sigs(void)
{
int sigs_seen = 0;
while (1) {
smallint sig = bb_got_signal;
if (!sig)
return sigs_seen;
bb_got_signal = 0;
sigs_seen = 1;
#if ENABLE_FEATURE_USE_INITTAB
if (sig == SIGHUP)
reload_inittab();
#endif
if (sig == SIGINT)
run_actions(CTRLALTDEL);
if (sig == SIGQUIT) {
exec_restart_action();
/* returns only if no restart action defined */
}
if ((1 << sig) & (0
#ifdef SIGPWR
+ (1 << SIGPWR)
#endif
+ (1 << SIGUSR1)
+ (1 << SIGUSR2)
+ (1 << SIGTERM)
)) {
halt_reboot_pwoff(sig);
}
}
}
判斷為SIGTERM進入halt_reboot_pwoff函數
static void halt_reboot_pwoff(int sig)
{
const char *m;
unsigned rb;
/* We may call run() and it unmasks signals,
* including the one masked inside this signal handler.
* Testcase which would start multiple reboot scripts:
* while true; do reboot; done
* Preventing it:
*/
reset_sighandlers_and_unblock_sigs();
run_shutdown_and_kill_processes();
m = "halt";
rb = RB_HALT_SYSTEM;
if (sig == SIGTERM) {
m = "reboot";
rb = RB_AUTOBOOT;
} else if (sig == SIGUSR2) {
m = "poweroff";
rb = RB_POWER_OFF;
}
message(L_CONSOLE, "Requesting system %s", m);
pause_and_low_level_reboot(rb);
/* not reached */
}
reset_sighandlers_and_unblock_sigs函數將信號重置回默認處理。
static void reset_sighandlers_and_unblock_sigs(void)
{
bb_signals(0
+ (1 << SIGUSR1)
+ (1 << SIGUSR2)
+ (1 << SIGTERM)
+ (1 << SIGQUIT)
+ (1 << SIGINT)
+ (1 << SIGHUP)
+ (1 << SIGTSTP)
+ (1 << SIGSTOP)
, SIG_DFL);
sigprocmask_allsigs(SIG_UNBLOCK);
}
run_shutdown_and_kill_processes函數給所有進程發送SIGTERM信號並執行sync(保存數據)
延時后再次發送SIGKILL信號,這里說明一下為什么要發送SIGKILL信號,一般的SIGINT和SIGTERM信號都可以屏蔽或轉作他用,SIGKILL信號是不可被屏蔽的,
這樣告訴其他進程必須終止。
static void run_shutdown_and_kill_processes(void)
{
/* Run everything to be run at "shutdown". This is done _prior_
* to killing everything, in case people wish to use scripts to
* shut things down gracefully... */
run_actions(SHUTDOWN);
message(L_CONSOLE | L_LOG, "The system is going down NOW!");
/* Send signals to every process _except_ pid 1 */
kill(-1, SIGTERM);
message(L_CONSOLE | L_LOG, "Sent SIG%s to all processes", "TERM");
sync();
sleep(1);
kill(-1, SIGKILL);
message(L_CONSOLE, "Sent SIG%s to all processes", "KILL");
sync();
/*sleep(1); - callers take care about making a pause */
}
最終進入函數pause_and_low_level_reboot,起一個輕量級進程執行reboot標准C函數
static void pause_and_low_level_reboot(unsigned magic)
{
pid_t pid;
/* Allow time for last message to reach serial console, etc */
sleep(1);
/* We have to fork here, since the kernel calls do_exit(EXIT_SUCCESS)
* in linux/kernel/sys.c, which can cause the machine to panic when
* the init process exits... */
pid = vfork();
if (pid == 0) { /* child */
reboot(magic);
_exit(EXIT_SUCCESS);
}
while (1)
sleep(1);
}
到這里busybox里面的內容全部處理完。
3.標准C函數reboot
前面執行reboot -f 就是直接執行的這個函數
reboot函數比較簡單,直接進行系統調用進入內核。(0xffe1dead feeldead這個魔術還是比較有意思的)
其中參數howto為RB_AUTOBOOT=0x01234567
sysdeps/unix/sysv/linux/reboot.c
int
reboot (int howto)
{
return INLINE_SYSCALL (reboot, 3, (int) 0xfee1dead, 672274793, howto);
}
4.內核系統調用
kernel/sys.c
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
void __user *, arg)
{
。。。
mutex_lock(&reboot_mutex);
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
break;
case LINUX_REBOOT_CMD_CAD_ON:
C_A_D = 1;
break;
case LINUX_REBOOT_CMD_CAD_OFF:
C_A_D = 0;
break;
case LINUX_REBOOT_CMD_HALT:
kernel_halt();
do_exit(0);
panic("cannot halt");
case LINUX_REBOOT_CMD_POWER_OFF:
kernel_power_off();
do_exit(0);
break;
。。。
default:
ret = -EINVAL;
break;
}
mutex_unlock(&reboot_mutex);
return ret;
}
進入
case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
break;
調用kernel_restart函數
——>machine_restart
void machine_restart(char *cmd)
{
machine_shutdown();
if (ppc_md.restart)
ppc_md.restart(cmd);
#ifdef CONFIG_SMP
smp_send_stop();
#endif
printk(KERN_EMERG "System Halted, OK to turn off power\n");
local_irq_disable();
while (1) ;
}
這個函數之后就與具體的架構有關系了。
下面是powerpc P1020芯片的復位
ppc_md.restart(cmd);的函數原型在/arch/powerpc/platforms/85xx中定義
define_machine(p2020_rdb_pc) {
.name = "P2020RDB-PC",
.probe = p2020_rdb_pc_probe,
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
#endif
.get_irq = mpic_get_irq,
.restart = fsl_rstcr_restart,
.calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
void fsl_rstcr_restart(char *cmd)
{
local_irq_disable();
if (rstcr)
/* set reset control register */
out_be32(rstcr, 0x2); /* HRESET_REQ */
while (1) ;
}
最終cpu往寄存器Reset control register(0x000E_00B0)中寫2
也就是往管腳HRESET_REQ發出了一個信號,該信號應該與HRESET硬復位管腳相連
這樣就實現了CPU的復位