Linux reboot全過程

本文轉載自查看原文 2021-02-03 12:05 486 docker

一、版本說明

嵌入式Linux 下面的reboot命令看似簡單，但出問題時定位起來發現別有洞天。

下面就按在shell下執行reboot命令之后程序的執行過程進行解析。

Busybox：1.23.2                        ——制作跟文件系統，/sbin/reboot程序的由來

Libc：2.6.1                                  ——標准C庫

Linux kernel：2.6.35                 ——內核版本

二、流程簡介

         如圖所示是reboot的簡要流程圖。

普通的reboot是通過busybox為入口，進入halt_main函數，然后給init進程發送SIGTERM信號，init進程接收到信號后給其他進程發送終止信號，最后調用C庫函數reboot，reboot通過系統調用sys_reboot進入內核，內核將整個系統重啟。其中在shell中執行reboot –f則通過halt_main直接調用C函數reboot，不經過init進程。
三、代碼詳解
1.reboot命令端

執行reboot命令，busybox檢查當前命令為reboot，進入函數halt_main，

reboot，halt和poweroff都會進入這個函數，不同的命令發送的信號和執行的操作不同。

現只分析reboot的情況。
代碼如下

    int halt_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
    int halt_main(int argc UNUSED_PARAM, char **argv)
    {
       static const int magic[] = {
           RB_HALT_SYSTEM,
           RB_POWER_OFF,
           RB_AUTOBOOT
       };
       static const smallint signals[] = { SIGUSR1, SIGUSR2, SIGTERM };

       int delay = 0;
       int which, flags, rc;

       /* Figure out which applet we're running */
       for (which = 0; "hpr"[which] != applet_name[0]; which++)
           continue;

       /* Parse and handle arguments */
       opt_complementary = "d+"; /* -d N */
       /* We support -w even if !ENABLE_FEATURE_WTMP,
       * in order to not break scripts.
       * -i (shut down network interfaces) is ignored.
       */
       flags = getopt32(argv, "d:nfwi", &delay);

       sleep(delay);

       write_wtmp();

       if (flags & 8) /* -w */
           return EXIT_SUCCESS;

       if (!(flags & 2)) /* no -n */
           sync();

       /* Perform action. */
       rc = 1;
       if (!(flags & 4)) { /* no -f */
    //TODO: I tend to think that signalling linuxrc is wrong
    // pity original author didn't comment on it...
           if (ENABLE_FEATURE_INITRD) {
               /* talk to linuxrc */
               /* bbox init/linuxrc assumed */
               pid_t *pidlist = find_pid_by_name("linuxrc");
               if (pidlist[0] > 0)
                   rc = kill(pidlist[0], signals[which]);
               if (ENABLE_FEATURE_CLEAN_UP)
                   free(pidlist);
           }
           if (rc) {
               /* talk to init */
               if (!ENABLE_FEATURE_CALL_TELINIT) {
                   /* bbox init assumed */
                   rc = kill(1, signals[which]);
               } else {
                   /* SysV style init assumed */
                   /* runlevels:
                   * 0 == shutdown
                   * 6 == reboot */
                   execlp(CONFIG_TELINIT_PATH,
                           CONFIG_TELINIT_PATH,
                           which == 2 ? "6" : "0",
                           (char *)NULL
                   );
                   bb_perror_msg_and_die("can't execute '%s'",
                           CONFIG_TELINIT_PATH);
               }
           }
       } else {
           rc = reboot(magic[which]);
       }

       if (rc)
           bb_perror_nomsg_and_die();
       return rc;
    }

該函數判斷reboot是否帶了 -f 參數，如果帶了，直接調用reboot調用C函數庫

如果沒帶，則通過

kill(1, signals[which]);

給init進程發送SIGTERM信號。

2.init進程端

init進程初始化函數init_main將部分信號進行重定義

           bb_signals_recursive_norestart(0
               + (1 << SIGINT) /* Ctrl-Alt-Del */
               + (1 << SIGQUIT) /* re-exec another init */
    #ifdef SIGPWR
               + (1 << SIGPWR) /* halt */
    #endif
               + (1 << SIGUSR1) /* halt */
               + (1 << SIGTERM) /* reboot */
               + (1 << SIGUSR2) /* poweroff */
    #if ENABLE_FEATURE_USE_INITTAB
               + (1 << SIGHUP) /* reread /etc/inittab */
    #endif
               , record_signo);

    void record_signo(int signo)
    {
       bb_got_signal = signo;
    }


將SIGUSR1(halt) SIGUSR2(poweroff) SIGTERM(reboot)信號存入全局變量bb_got_signal中。
在init_main的最后進入一個while(1)循環，不斷檢查信號和等待子進程的退出

其中check_delayed_sigs就是用來檢查這個全局變量的，如下：

       while (1) {
           int maybe_WNOHANG;

           maybe_WNOHANG = check_delayed_sigs();

           /* (Re)run the respawn/askfirst stuff */
           run_actions(RESPAWN | ASKFIRST);
           maybe_WNOHANG |= check_delayed_sigs();

           /* Don't consume all CPU time - sleep a bit */
           sleep(1);
           maybe_WNOHANG |= check_delayed_sigs();

           /* Wait for any child process(es) to exit.
           *
           * If check_delayed_sigs above reported that a signal
           * was caught, wait will be nonblocking. This ensures
           * that if SIGHUP has reloaded inittab, respawn and askfirst
           * actions will not be delayed until next child death.
           */
           if (maybe_WNOHANG)
               maybe_WNOHANG = WNOHANG;
           while (1) {
               pid_t wpid;
               struct init_action *a;

               /* If signals happen _in_ the wait, they interrupt it,
               * bb_signals_recursive_norestart set them up that way
               */
               wpid = waitpid(-1, NULL, maybe_WNOHANG);
               if (wpid <= 0)
                   break;

               a = mark_terminated(wpid);
               if (a) {
                   message(L_LOG, "process '%s' (pid %d) exited. "
                           "Scheduling for restart.",
                           a->command, wpid);
               }
               /* See if anyone else is waiting to be reaped */
               maybe_WNOHANG = WNOHANG;
           }
       } /* while (1) */

而里面的while(1)一般會阻塞在waitpid中，那么信號檢查是不是會有問題？

    WNOHANG        如果沒有可用的子進程退出狀態，立即返回而不是阻塞

但maybe_WNOHANG的值應該是0，不是WNOHANG(=1)感覺還是會阻塞。我這樣理解的，因為所有的用戶進程都是init進程的子進程，我判斷前面執行reboot時也是一個子進程，halt_main發送完信號后就會退出，init接收到信號而且waitpid成功，然后跳出循環檢查信號。

下面看一下信號的處理部分

    static int check_delayed_sigs(void)
    {
       int sigs_seen = 0;

       while (1) {
           smallint sig = bb_got_signal;

           if (!sig)
               return sigs_seen;
           bb_got_signal = 0;
           sigs_seen = 1;
    #if ENABLE_FEATURE_USE_INITTAB
           if (sig == SIGHUP)
               reload_inittab();
    #endif
           if (sig == SIGINT)
               run_actions(CTRLALTDEL);
           if (sig == SIGQUIT) {
               exec_restart_action();
               /* returns only if no restart action defined */
           }
           if ((1 << sig) & (0
    #ifdef SIGPWR
                + (1 << SIGPWR)
    #endif
                + (1 << SIGUSR1)
                + (1 << SIGUSR2)
                + (1 << SIGTERM)
           )) {
               halt_reboot_pwoff(sig);
           }
       }
    }

判斷為SIGTERM進入halt_reboot_pwoff函數

    static void halt_reboot_pwoff(int sig)
    {
       const char *m;
       unsigned rb;

       /* We may call run() and it unmasks signals,
       * including the one masked inside this signal handler.
       * Testcase which would start multiple reboot scripts:
       * while true; do reboot; done
       * Preventing it:
       */
       reset_sighandlers_and_unblock_sigs();

       run_shutdown_and_kill_processes();

       m = "halt";
       rb = RB_HALT_SYSTEM;
       if (sig == SIGTERM) {
           m = "reboot";
           rb = RB_AUTOBOOT;
       } else if (sig == SIGUSR2) {
           m = "poweroff";
           rb = RB_POWER_OFF;
       }
       message(L_CONSOLE, "Requesting system %s", m);
       pause_and_low_level_reboot(rb);
       /* not reached */
    }

reset_sighandlers_and_unblock_sigs函數將信號重置回默認處理。

    static void reset_sighandlers_and_unblock_sigs(void)
    {
       bb_signals(0
           + (1 << SIGUSR1)
           + (1 << SIGUSR2)
           + (1 << SIGTERM)
           + (1 << SIGQUIT)
           + (1 << SIGINT)
           + (1 << SIGHUP)
           + (1 << SIGTSTP)
           + (1 << SIGSTOP)
           , SIG_DFL);
       sigprocmask_allsigs(SIG_UNBLOCK);
    }

run_shutdown_and_kill_processes函數給所有進程發送SIGTERM信號並執行sync(保存數據)

延時后再次發送SIGKILL信號，這里說明一下為什么要發送SIGKILL信號，一般的SIGINT和SIGTERM信號都可以屏蔽或轉作他用，SIGKILL信號是不可被屏蔽的，

這樣告訴其他進程必須終止。

    static void run_shutdown_and_kill_processes(void)
    {
       /* Run everything to be run at "shutdown". This is done _prior_
       * to killing everything, in case people wish to use scripts to
       * shut things down gracefully... */
       run_actions(SHUTDOWN);

       message(L_CONSOLE | L_LOG, "The system is going down NOW!");

       /* Send signals to every process _except_ pid 1 */
       kill(-1, SIGTERM);
       message(L_CONSOLE | L_LOG, "Sent SIG%s to all processes", "TERM");
       sync();
       sleep(1);

       kill(-1, SIGKILL);
       message(L_CONSOLE, "Sent SIG%s to all processes", "KILL");
       sync();
       /*sleep(1); - callers take care about making a pause */
    }

最終進入函數pause_and_low_level_reboot，起一個輕量級進程執行reboot標准C函數

    static void pause_and_low_level_reboot(unsigned magic)
    {
       pid_t pid;

       /* Allow time for last message to reach serial console, etc */
       sleep(1);

       /* We have to fork here, since the kernel calls do_exit(EXIT_SUCCESS)
       * in linux/kernel/sys.c, which can cause the machine to panic when
       * the init process exits... */
       pid = vfork();
       if (pid == 0) { /* child */
           reboot(magic);
           _exit(EXIT_SUCCESS);
       }
       while (1)
           sleep(1);
    }

到這里busybox里面的內容全部處理完。

3.標准C函數reboot
前面執行reboot -f 就是直接執行的這個函數

reboot函數比較簡單，直接進行系統調用進入內核。（0xffe1dead feeldead這個魔術還是比較有意思的）

其中參數howto為RB_AUTOBOOT=0x01234567

sysdeps/unix/sysv/linux/reboot.c

    int
    reboot (int howto)
    {
      return INLINE_SYSCALL (reboot, 3, (int) 0xfee1dead, 672274793, howto);
    }

4.內核系統調用

kernel/sys.c

    SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
           void __user *, arg)
    {
    。。。

       mutex_lock(&reboot_mutex);
       switch (cmd) {
       case LINUX_REBOOT_CMD_RESTART:
           kernel_restart(NULL);
           break;

       case LINUX_REBOOT_CMD_CAD_ON:
           C_A_D = 1;
           break;

       case LINUX_REBOOT_CMD_CAD_OFF:
           C_A_D = 0;
           break;

       case LINUX_REBOOT_CMD_HALT:
           kernel_halt();
           do_exit(0);
           panic("cannot halt");

       case LINUX_REBOOT_CMD_POWER_OFF:
           kernel_power_off();
           do_exit(0);
           break;

    。。。

       default:
           ret = -EINVAL;
           break;
       }
       mutex_unlock(&reboot_mutex);
       return ret;
    }

進入

case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
break;
調用kernel_restart函數

——>machine_restart

    void machine_restart(char *cmd)
    {
       machine_shutdown();
       if (ppc_md.restart)
           ppc_md.restart(cmd);
    #ifdef CONFIG_SMP
       smp_send_stop();
    #endif
       printk(KERN_EMERG "System Halted, OK to turn off power\n");
       local_irq_disable();
       while (1) ;
    }

這個函數之后就與具體的架構有關系了。

下面是powerpc P1020芯片的復位

ppc_md.restart(cmd);的函數原型在/arch/powerpc/platforms/85xx中定義

    define_machine(p2020_rdb_pc) {
       .name           = "P2020RDB-PC",
       .probe           = p2020_rdb_pc_probe,
       .setup_arch       = mpc85xx_rdb_setup_arch,
       .init_IRQ       = mpc85xx_rdb_pic_init,
    #ifdef CONFIG_PCI
       .pcibios_fixup_bus   = fsl_pcibios_fixup_bus,
    #endif
       .get_irq       = mpic_get_irq,
       .restart       = fsl_rstcr_restart,
       .calibrate_decr       = generic_calibrate_decr,
       .progress       = udbg_progress,
    };

    void fsl_rstcr_restart(char *cmd)
    {
       local_irq_disable();
       if (rstcr)
           /* set reset control register */
           out_be32(rstcr, 0x2);   /* HRESET_REQ */

       while (1) ;
    }

最終cpu往寄存器Reset control register（0x000E_00B0）中寫2

也就是往管腳HRESET_REQ發出了一個信號，該信號應該與HRESET硬復位管腳相連

這樣就實現了CPU的復位

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Linux reboot全過程 Linux下安裝ffmpeg全過程 Linux:MPlayer安裝全過程 Linux部署項目全過程 linux 安裝及使用的全過程 Linux下Openssl的安裝全過程感冒全過程 Linux CentOS7.x安裝docker全過程 [oracle]linux oracle rac 搭建全過程 Linux 下編譯C程序的全過程