一個程序的前世今生(二)——可執行文件如何加載進內存


二、 加載--可執行文件放入內存
  通過前一章可以知道一個程序是如何從我們編寫的代碼變成一個可以執行的文件的。但是此時它仍是放在磁盤上的一個文件,並不是我們通常理解的程序--在內存上運行的一段代碼。  
  程序運行在內存上,所以首先我們需要了解虛擬內存的一些基本知識,然后我們以linux上在shell會話中執行./hello這一命令來跟蹤可執行文件是如何在內存中運行起來的。
  2.1 執行shell時發生了什么
    我們可以使用strace命令跟蹤程序執行的過程,在shell執行腳本時其實是根據文件開頭的腳本類型聲明調用對應的腳本解釋器,如果shell發現執行的是可執行程序則會調用execv系統調用。
    

     可以看出shell調用了一個叫做execve的系統調用來執行hello這個程序,系統調用如何執行到的下一篇再分析,先在這里留個坑。最終會執行內核的d__do_execve_file這個函數,我們接下來分析它時如何執行ELF格式的文件的。

    2.2  execve

    函數位置: fs/exec.c,基本調用關系如下圖,前邊流程是准備過程,最后一步紅線標注的為調用可執行文件的過程,函數的簡化流程見下方
    

static int __do_execve_file(int fd, struct filename *filename, struct user_arg_ptr argv,
struct user_arg_ptr envp,
                int flags, struct file *file)
{
    char *pathbuf = NULL;
    struct linux_binprm *bprm;
    struct files_struct *displaced;
    int retval;
。。。
    retval = unshare_files(&displaced);    //不使用shell程序打開文件
    bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);

    retval = prepare_bprm_creds(bprm);  //對文件進行安全策略檢測

    if (!file)
        file = do_open_execat(fd, filename, flags);    //因為傳入的file為null,在此處就根據filename打開可執行文件


    sched_exec();        //在這個函數中使用調度類對當前的進程進行調度

    bprm->file = file;
    bprm->filename = filename->name;
    bprm->interp = bprm->filename;

    retval = bprm_mm_init(bprm);    //初始化bprm的mm結構體,即內存相關分配,主要是初始化了mm_struct

    retval = prepare_arg_pages(bprm, argv, envp); //計算出入參和環境變量的數量

    retval = prepare_binprm(bprm);    //填充gid和uid用於權限管理,並且使用elf的前128字節填充buf數組

    retval = copy_strings_kernel(1, &bprm->filename, bprm);  //拷貝文件名到新分配的頁面中

    bprm->exec = bprm->p;    
    retval = copy_strings(bprm->envc, envp, bprm);    //拷貝環境變量,因為棧向下增長所以先拷貝環境變量會使它處在棧中入參的后方

    retval = copy_strings(bprm->argc, argv, bprm);    //拷貝入參

    would_dump(bprm, bprm->file);

    retval = exec_binprm(bprm);        //這里開始執行可執行文件

    /* execve succeeded */
。。。
  //下方是執行錯誤或成功后的處理流程,暫不分析

}

  程序中對文件的操作都使用到了這個結構體linux_binprm,在調用實際執行文件的函數時入參也是這個,結構體定義在binfmts.h中

 

struct linux_binprm{
    char buf[BINPRM_BUF_SIZE];  //保存課執行文件的頭128個字節
#ifdef CONFIG_MMU
    struct vm_area_struct *vma;
    unsigned long vma_pages; //當前內存頁的最高地址
#else
# define MAX_ARG_PAGES    32
    struct page *page[MAX_ARG_PAGES];
#endif
    struct mm_struct *mm;
    unsigned long p; /* current top of mem */
    unsigned int
        cred_prepared:1,/* true if creds already prepared (multiple
                 * preps happen for interpreters) */
        cap_effective:1;/* true if has elevated effective capabilities,
                 * false if not; except for init which inherits
                 * its parent's caps anyway */
#ifdef __alpha__
    unsigned int taso:1;
#endif
    unsigned int recursion_depth;
    struct file * file;   //要執行的文件
    struct cred *cred;    /* new credentials */
    int unsafe;        /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
    unsigned int per_clear;    /* bits to clear in current->personality */
    int argc, envc;   //命令行參數和環境變量參數
    char * filename;    /* Name of binary as seen by procps */  //要被執行的文件的名的二進制
    char * interp;        /* Name of the binary really executed. Most
                   of the time same as filename, but could be
                   different for binfmt_{misc,script} */  //要被執行的文件的真實名,通常和filename相同
    unsigned interp_flags;
    unsigned interp_data;
    unsigned long loader, exec;
};

   2.3 execve-->load_elf_binary

    上一節找到了"__do_execve_file" -> "exec_binbprm" -> "search_binary_handler" -> "fmt->load_binary"這一條路徑,那怎么到了load_elf_binary這個函數呢?我們返回search_binary_handler函數查看一下

 

int search_binary_handler(struct linux_binprm *bprm)
{
    bool need_retry = IS_ENABLED(CONFIG_MODULES);
    struct linux_binfmt *fmt;  //我們之前說的用於文件操作的結構體
    int retval;
//省略部分無關代碼
 retry:
    read_lock(&binfmt_lock);
    list_for_each_entry(fmt, &formats, lh) {    //從formats中遍歷找到符合條件的文件格式:fmt
        if (!try_module_get(fmt->module))        //如果一個模塊處於活動狀態
            continue;
        read_unlock(&binfmt_lock);

        bprm->recursion_depth++;
        retval = fmt->load_binary(bprm);    //調用對應格式注冊的load_binary函數,bprm格式和遍歷到的不一致內部會返回錯誤並繼續搜尋剩余的,我們簡化為這里找到了對應的文件,即ELF格式
        bprm->recursion_depth--;

        read_lock(&binfmt_lock);
    }
    read_unlock(&binfmt_lock);
    //省略無關代碼

    return retval;
}

    可以看到,這里根據fomats為頭的鏈表逐個遍歷,找到和bprm的fmt一致的已經注冊到內核的結構進行load操作,那么formats從哪兒來的呢?

    在上節的結構體linux_binprm的文件中可以找到來歷,其實在search_binary_handler中就可以看到,判斷文件類型是否相符的入參就是linux_binprm類型,所以formats的來歷從它找准沒錯,我們到binfmts.h中可以找到如下代碼:

 

/*
 * This structure defines the functions that are used to load the binary formats that
 * linux accepts.
 */
struct linux_binfmt {
    struct list_head lh;
    struct module *module;
    int (*load_binary)(struct linux_binprm *);
    int (*load_shlib)(struct file *);
    int (*core_dump)(struct coredump_params *cprm);
    unsigned long min_coredump;    /* minimal dump size */
} __randomize_layout;

extern void __register_binfmt(struct linux_binfmt *fmt, int insert);

/* Registration of default binfmt handlers */
static inline void register_binfmt(struct linux_binfmt *fmt)
{
    __register_binfmt(fmt, 0);
}
/* Same as above, but adds a new binfmt at the top of the list */
static inline void insert_binfmt(struct linux_binfmt *fmt)
{
    __register_binfmt(fmt, 1);
}

  __register_binfmt函數在exec.c中,所以從這里基本就可以看出來,例如處理ELF格式的模塊在模塊初始化時就把模塊名和加載方法通過注冊方式添加到formts的鏈表中,所以在執行文件的時候就可以根據遍歷formats來尋找系統可用的格式。

static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);

void __register_binfmt(struct linux_binfmt * fmt, int insert)
{
    BUG_ON(!fmt);
    if (WARN_ON(!fmt->load_binary))
        return;
    write_lock(&binfmt_lock);
    insert ? list_add(&fmt->lh, &formats) :
         list_add_tail(&fmt->lh, &formats);
    write_unlock(&binfmt_lock);
}

  那怎么注冊進來的呢? 在fs/binfmt_elf.c可以找到答案。這部分需要linux模塊加載的基本知識,不了解的可以去搜一下。簡單理解就是一個模塊加載進linux系統的時候會先執行一個module_init的程序初始化自己,elf注冊到formats的過程就在elf模塊的初始化函數處。

static int __init init_elf_binfmt(void)
{
    register_binfmt(&elf_format);
    return 0;
}

static void __exit exit_elf_binfmt(void)
{
    /* Remove the COFF and ELF loaders. */
    unregister_binfmt(&elf_format);
}

core_initcall(init_elf_binfmt);
module_exit(exit_elf_binfmt);

   好了,講完來歷下面我們可以來看一下這個函數是如何加載和執行我們的輸入文件了。

2.4 load_elf_binary
  實在太長了,先加個注釋吧,有時間了再試着畫個圖梳理下。幾個相對復雜的調用下方做一些分析,可以先看下面涉及的函數輔助注釋進行理解。static int load_elf_binary(struct linux_binprm *bprm){struct file *interpreter = NULL; /* to shut gcc up */

 

 unsigned long load_addr = 0, load_bias = 0; int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; unsigned long elf_bss, elf_brk; int bss_prot = 0; int retval, i; unsigned long elf_entry; unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc __maybe_unused = 0; int executable_stack = EXSTACK_DEFAULT; struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; } *loc; struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; struct pt_regs *regs; loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { retval = -ENOMEM; goto out_ret; } /* 填充ELF頭信息 在load_elf_binary之前內核已經使用映像文件的前128個字節對bprm->buf進行了填充, 這里使用這此信息填充映像的文件頭,參考上一節內容 */ loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC; /* First of all, some simple consistency checks 比較文件頭的前四個字節,查看是否是ELF文件類型定義的"\177ELF"*/
    if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out; /*除前4個字符以外,還要看映像的類型是否ET_EXEC和ET_DYN之一;前者表示可執行映像,后者表示共享庫 */
    if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) goto out; /* 檢查特定的目標機器標識 */
    if (!elf_check_arch(&loc->elf_ex)) goto out; if (elf_check_fdpic(&loc->elf_ex)) goto out; if (!bprm->file->f_op->mmap) goto out; /* load_elf_phdrs 加載程序頭表 load_elf_phdrs函數就是通過kernel_read讀入整個program header table 從函數代碼中可以看到,一個可執行程序必須至少有一個段(segment), 而所有段的大小之和不能超過64K。 */ elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file); if (!elf_phdata) goto out; /* 3. 尋找和處理解釋器段 這個for循環的目的在於尋找和處理目標映像的"解釋器"段。 "解釋器"段的類型為PT_INTERP, 找到后就根據其位置的p_offset和大小p_filesz把整個"解釋器"段的內容讀入緩沖區。 "解釋器"段實際上只是一個字符串, 即解釋器的文件名,如"/lib/ld-linux.so.2"。 有了解釋器的文件名以后,就通過open_exec()打開這個文件, 再通過kernel_read()讀入其開關128個字節,即解釋器映像的頭部。*/ elf_ppnt = elf_phdata; for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {    //根據段數目逐條遍歷各個段
        char *elf_interpreter; loff_t pos; /* 3.1 檢查段類型是否為PT_INTERP即解釋器段,不是則遍歷下一個 */
        if (elf_ppnt->p_type != PT_INTERP) continue; /* * This is the program interpreter used for shared libraries - * for now assume that this is an a.out format binary. */ retval = -ENOEXEC; if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) goto out_free_ph; retval = -ENOMEM; /* 為動態連接器分配空間並讀取加載 */ elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (!elf_interpreter) goto out_free_ph; /* 3.2 根據其位置的p_offset和大小p_filesz把整個"解釋器"段的內容讀入緩沖區 */ pos = elf_ppnt->p_offset; retval = kernel_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz, &pos); if (retval != elf_ppnt->p_filesz) { if (retval >= 0) retval = -EIO; goto out_free_interp; } /* make sure path is NULL terminated */ retval = -ENOEXEC; if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; /* 3.3 通過open_exec()打開解釋器文件 內核把新進程的堆棧中設置一些標記對, 以指示動態鏈接器的相關操作,詳見open_exec實現 */ interpreter = open_exec(elf_interpreter); kfree(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) goto out_free_ph; /* * If the binary is not readable then enforce mm->dumpable = 0 * regardless of the interpreter's permissions. */ would_dump(bprm, interpreter); /* Get the exec headers */ pos = 0; /* 3.4 通過kernel_read()讀入解釋器的前128個字節,即解釋器映像的頭部。*/ retval = kernel_read(interpreter, &loc->interp_elf_ex, sizeof(loc->interp_elf_ex), &pos); if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } break; out_free_interp: kfree(elf_interpreter); goto out_free_ph; } elf_ppnt = elf_phdata; for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) switch (elf_ppnt->p_type) { /* 檢查堆棧可執行性,保存在executable_stack */
        case PT_GNU_STACK: if (elf_ppnt->p_flags & PF_X) executable_stack = EXSTACK_ENABLE_X; else executable_stack = EXSTACK_DISABLE_X; break; /* PT_LOPROC和PT_HIPROC類型的Segment用來提供給特定的計算機體系進行檢查 */
        case PT_LOPROC ... PT_HIPROC: retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt, bprm->file, false, &arch_state); if (retval) goto out_free_dentry; break; } /* 4. 檢查並讀取解釋器的程序表頭 */
    /* Some simple consistency checks for the interpreter */
    /* 4.1 檢查解釋器頭的信息 檢查是否由動態連接器,無論是否有動態連接器都會執行elf文件 */
    if (interpreter) { retval = -ELIBBAD; /* Not an ELF interpreter */
        if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out_free_dentry; /* Verify the interpreter has a valid arch */
        if (!elf_check_arch(&loc->interp_elf_ex) || elf_check_fdpic(&loc->interp_elf_ex)) goto out_free_dentry; /* Load the interpreter program headers 4.2 讀入解釋器的程序頭 */ interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex, interpreter); if (!interp_elf_phdata) goto out_free_dentry; /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ elf_ppnt = interp_elf_phdata; for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++) switch (elf_ppnt->p_type) { case PT_LOPROC ... PT_HIPROC: retval = arch_elf_pt_proc(&loc->interp_elf_ex, elf_ppnt, interpreter, true, &arch_state); if (retval) goto out_free_dentry; break; } } /* * Allow arch code to reject the ELF at this point, whilst it's * still possible to return an error to the code that invoked * the exec syscall. */ retval = arch_check_elf(&loc->elf_ex, !!interpreter, &loc->interp_elf_ex, &arch_state); if (retval) goto out_free_dentry; /* Flush all traces of the currently running executable 在此清除掉了父進程的所有相關代碼 */ retval = flush_old_exec(bprm); if (retval) goto out_free_dentry; /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */
    /* 設置elf可執行文件的特性 */ SET_PERSONALITY2(loc->elf_ex, &arch_state); if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); install_exec_creds(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later 為下面的動態連接器執行獲取內核空間page */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) goto out_free_dentry; /* bss段,brk段先初始化為0 */ elf_bss = 0; elf_brk = 0; /* code代碼段 */ start_code = ~0UL; end_code = 0; /* data數據段 */ start_data = 0; end_data = 0; /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. 5 裝入目標程序的段segment 這段代碼從目標映像的程序頭中搜索類型為PT_LOAD的段(Segment)。在二進制映像中,只有類型為PT_LOAD的段才是需要裝入的。 當然在裝入之前,需要確定裝入的地址,只要考慮的就是頁面對齊,還有該段的p_vaddr域的值(上面省略這部分內容)。 確定了裝入地址后,就通過elf_map()建立用戶空間虛擬地址空間與目標映像文件中某個連續區間之間的映射,其返回值就是實際映射的起始地址。 */
  /*如果要加載的文件數據類型為ET_EXEC,則在固定地址上分配虛擬內存,因此要加上MAP_FIXED標志,   而如果要加載的數據類型為ET_DYN,則需要從ELF_ET_DYN_BASE地址處開始映射時,在設置了PF_RANDOMIZE標志位時,需要加上arch_mmap_rnd()隨機因子,將偏移記錄到load_bias中。total_size為計算的需要映射的內存大小。   再往下就通過elf_map函數將文件映射到虛擬內存中。   如果是第一次映射,則需要記錄虛擬的elf文件裝載地址load_addr,如果是ET_DYN類型的數據,需要加上偏移load_bias。   每次映射后,都要修改bss段、代碼段、數據段、堆的起始位置,   對同一個elf文件而言,start_code向上增長,start_data向下增長,elf_bss向上增長,end_code 向上增長,end_data 向上增長,elf_brk向上增長,   因此從虛擬內存中看,從低地址到高地址依次為代碼段,數據段,bss段和堆的起始地址。當裝載完畢退出循環后需要將這些變量加上偏移load_bias。   最后通過set_brk在elf_bss到elf_brk之間分配內存空間。*/
    /* 按照先前獲取的程序頭表,循環將所有的可執行文件加載到內存中 */
    for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE; unsigned long k, vaddr; unsigned long total_size = 0; /* 5.1 搜索PT_LOAD的段, 這個是需要裝入的 */
        if (elf_ppnt->p_type != PT_LOAD) continue; if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; /* 5.2 檢查地址和頁面的信息 */   
            /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ retval = set_brk(elf_bss + load_bias, elf_brk + load_bias, bss_prot); if (retval) goto out_free_dentry; nbyte = ELF_PAGEOFFSET(elf_bss); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; if (nbyte > elf_brk - elf_bss) nbyte = elf_brk - elf_bss; if (clear_user((void __user *)elf_bss + load_bias, nbyte)) { /* * This bss-zeroing can fail if the ELF * file specifies odd protections. So * we don't check the return value */ } } /* * Some binaries have overlapping elf segments and then * we have to forcefully map over an existing mapping * e.g. over this newly established brk mapping. */ elf_fixed = MAP_FIXED; } elf_prot = make_prot(elf_ppnt->p_flags); elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr; /* * If we are loading ET_EXEC or we have already performed * the ET_DYN load_addr calculations, proceed normally. */
        if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { elf_flags |= elf_fixed; } else if (loc->elf_ex.e_type == ET_DYN) { /* * This logic is run once for the first LOAD Program * Header for ET_DYN binaries to calculate the * randomization (load_bias) for all the LOAD * Program Headers, and to calculate the entire * size of the ELF mapping (total_size). (Note that * load_addr_set is set to true later once the * initial mapping is performed.) * * There are effectively two types of ET_DYN * binaries: programs (i.e. PIE: ET_DYN with INTERP) * and loaders (ET_DYN without INTERP, since they * _are_ the ELF interpreter). The loaders must * be loaded away from programs since the program * may otherwise collide with the loader (especially * for ET_EXEC which does not have a randomized * position). For example to handle invocations of * "./ld.so someprog" to test out a new version of * the loader, the subsequent program that the * loader loads must avoid the loader itself, so * they cannot share the same load range. Sufficient * room for the brk must be allocated with the * loader as well, since brk must be available with * the loader. * * Therefore, programs are loaded offset from * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias * without MAP_FIXED). */
            if (interpreter) { load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); elf_flags |= elf_fixed; } else load_bias = 0; /* * Since load_bias is used for all subsequent loading * calculations, we must lower it by the first vaddr * so that the remaining calculations based on the * ELF vaddrs will be correctly offset. The result * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { retval = -EINVAL; goto out_free_dentry; } } /* 5.3 虛擬地址空間與目標映像文件的映射 確定了裝入地址后, 就通過elf_map()建立用戶空間虛擬地址空間 與目標映像文件中某個連續區間之間的映射, 其返回值就是實際映射的起始地址 */ error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } if (!load_addr_set) { load_addr_set = 1; load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); if (loc->elf_ex.e_type == ET_DYN) { load_bias += error - ELF_PAGESTART(load_bias + vaddr); load_addr += load_bias; reloc_func_desc = load_bias; } } k = elf_ppnt->p_vaddr; if (k < start_code) start_code = k; if (start_data < k) start_data = k; /* * Check to see if the section's size will overflow the * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */
        if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ retval = -EINVAL; goto out_free_dentry; } k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; if (k > elf_bss) elf_bss = k; if ((elf_ppnt->p_flags & PF_X) && end_code < k) end_code = k; if (end_data < k) end_data = k; k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; if (k > elf_brk) { bss_prot = elf_prot; elf_brk = k; } } /* 更新讀入內存中相關信息的記錄 */ loc->elf_ex.e_entry += load_bias; elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; end_code += load_bias; start_data += load_bias; end_data += load_bias; /* Calling set_brk effectively mmaps the pages that we need * for the bss and break sections. We must do this before * mapping in the interpreter, to make sure it doesn't wind * up getting placed where the bss needs to go. */
     /* 使用set_brk調整bss段的大小 */ retval = set_brk(elf_bss, elf_brk, bss_prot); if (retval) goto out_free_dentry; if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { retval = -EFAULT; /* Nobody gets to see this, but.. */
        goto out_free_dentry; } /* 6 填寫程序的入口地址 這段程序的邏輯非常簡單: 如果需要裝入解釋器,就通過load_elf_interp裝入其映像, 並把將來進入用戶空間的入口地址設置成load_elf_interp()的返回值, 即解釋器映像的入口地址。 而若不裝入解釋器,那么這個入口地址就是目標映像本身的入口地址。 */
    if (interpreter) { /* 存在動態鏈接器 內核把控制權傳遞給動態鏈接器。 動態鏈接器檢查程序對共享庫的依賴性, 並在需要時對其進行加載,由load_elf_interp完成 */ unsigned long interp_map_addr = 0; elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, &interp_map_addr, load_bias, interp_elf_phdata); if (!IS_ERR((void *)elf_entry)) { /* * load_elf_interp() returns relocation * adjustment */ interp_load_addr = elf_entry; elf_entry += loc->interp_elf_ex.e_entry; } if (BAD_ADDR(elf_entry)) { retval = IS_ERR((void *)elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; allow_write_access(interpreter); fput(interpreter); } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { retval = -EINVAL; goto out_free_dentry; } } kfree(interp_elf_phdata); kfree(elf_phdata); set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES retval = arch_setup_additional_pages(bprm, !!interpreter); if (retval < 0) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
    /* 7 create_elf_tables填寫目標文件的參數環境變量等必要信息 在完成裝入,啟動用戶空間的映像運行之前,還需要為目標映像和解釋器准備好一些有關的信息, 這些信息包括常規的argc、envc等等,還有一些"輔助向量(Auxiliary Vector)"。 這些信息需要復制到用戶空間,使它們在CPU進入解釋器或目標映像的程序入口時出現在用戶空間堆棧上。 這里的create_elf_tables()就起着這個作用。 */ retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) goto out; current->mm->end_code = end_code; current->mm->start_code = start_code; current->mm->start_data = start_data; current->mm->end_data = end_data; current->mm->start_stack = bprm->p; if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { /* * For architectures with ELF randomization, when executing * a loader directly (i.e. no interpreter listed in ELF * headers), move the brk area out of the mmap region * (since it grows up, and may collide early with the stack * growing down), and into the unused ELF_ET_DYN_BASE region. */
        if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && loc->elf_ex.e_type == ET_DYN && !interpreter) current->mm->brk = current->mm->start_brk = ELF_ET_DYN_BASE; current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); #ifdef compat_brk_randomized current->brk_randomized = 1; #endif } if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); } /* 讀取寄存器數據 */ regs = current_pt_regs(); #ifdef ELF_PLAT_INIT /* * The ABI may specify that certain registers be set up in special * ways (on i386 %edx is the address of a DT_FINI function, for * example. In addition, it may also specify (eg, PowerPC64 ELF) * that the e_entry field is the address of the function descriptor * for the startup routine, rather than the address of the startup * routine itself. This macro performs whatever initialization to * the regs structure is required as well as any relocations to the * function descriptor entries when executing dynamically links apps. */ ELF_PLAT_INIT(regs, reloc_func_desc); #endif finalize_exec(bprm); /* 8 最后,start_thread()這個宏操作會將eip和esp改成新的地址,就使得CPU在返回用戶空間時就進入新的程序入口。 如果存在解釋器映像,那么這就是解釋器映像的程序入口,否則就是目標映像的程序入口。 那么什么情況下有解釋器映像存在,什么情況下沒有呢? 如果目標映像與各種庫的鏈接是靜態鏈接,因而無需依靠共享庫、即動態鏈接庫,那就不需要解釋器映像; 否則就一定要有解釋器映像存在。 對於一個目標程序, gcc在編譯時,除非顯示的使用static標簽,否則所有程序的鏈接都是動態鏈接的,也就是說需要解釋器。 由此可見,我們的程序在被內核加載到內存,內核跳到用戶空間后並不是執行我們程序的, 而是先把控制權交到用戶空間的解釋器,由解釋器加載運行用戶程序所需要的動態庫(比如libc等等), 然后控制權才會轉移到用戶程序。 */
    /* 開始執行程序,這時已經是子進程了 */ start_thread(regs, elf_entry, bprm->p); retval = 0; out: kfree(loc); out_ret: return retval; /* error cleanup */ out_free_dentry: kfree(interp_elf_phdata); allow_write_access(interpreter); if (interpreter) fput(interpreter); out_free_ph: kfree(elf_phdata); goto out; }
 ① load_elf_binary->flush_old_exec: 主要用來進行新進程地址空間的替換,並刪除同線程組中的其他線程
int flush_old_exec(struct linux_binprm * bprm)
{
    de_thread(current);
    set_mm_exe_file(bprm->mm, bprm->file);
    exec_mmap(bprm->mm);

    bprm->mm = NULL;
    set_fs(USER_DS);
    current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
                    PF_NOFREEZE | PF_NO_SETAFFINITY);
    flush_thread();
    current->personality &= ~bprm->per_clear;
    return 0;
} 

   因為即將要替換新進程的地址空間,所以首先通過de_thread函數用來刪除同線程組中的其他線程。

  set_mm_exe_file函數設置新進程的路徑,即mm_struct中的exe_file成員變量。

  通過exec_mmap函數將新進程的地址空間設置為bprm中創建並設置好的地址空間。

  flush_thread函數主要用來初始化thread_struct中的TLS元數據信息。

  最后設置進程的標志位flags和personality,personality用來兼容linux的舊版本或者BSD等其他版本。

 ② load_elf_binary->setup_new_exec: 對新進程的新進程的mm_struct結構進行設置
void setup_new_exec(struct linux_binprm * bprm)
{
    arch_pick_mmap_layout(current->mm);
    current->sas_ss_sp = current->sas_ss_size = 0;

    if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
        set_dumpable(current->mm, SUID_DUMP_USER);
    else
        set_dumpable(current->mm, suid_dumpable);

    perf_event_exec();
    __set_task_comm(current, kbasename(bprm->filename), true);

    current->mm->task_size = TASK_SIZE;
    if (!uid_eq(bprm->cred->uid, current_euid()) ||
        !gid_eq(bprm->cred->gid, current_egid())) {
        current->pdeath_signal = 0;
    } else {
        would_dump(bprm, bprm->file);
        if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
            set_dumpable(current->mm, suid_dumpable);
    }

    current->self_exec_id++;
    flush_signal_handlers(current, 0);
    do_close_on_exec(current->files);
}

  arch_pick_mmap_layout函數對設置了mmap的起始地址和分配函數。
  然后更新mm的標志位,通過kbasename函數根據文件路徑bprm->filename獲得最后的文件名,再調用__set_task_comm函數設置進程的文件路徑,最終設置到task_struct的comm變量中。
  flush_signal_handlers用於清空信號的處理函數。最后調用do_close_on_exec關閉對應的文件。

  ③ load_elf_binary->setup_arg_pages;

int setup_arg_pages(struct linux_binprm *bprm,
            unsigned long stack_top,
            int executable_stack)
{
    unsigned long ret;
    unsigned long stack_shift;
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma = bprm->vma;
    struct vm_area_struct *prev = NULL;
    unsigned long vm_flags;
    unsigned long stack_base;
    unsigned long stack_size;
    unsigned long stack_expand;
    unsigned long rlim_stack;

    stack_top = arch_align_stack(stack_top);
    stack_top = PAGE_ALIGN(stack_top);

    stack_shift = vma->vm_end - stack_top;

    bprm->p -= stack_shift;
    mm->arg_start = bprm->p;
    bprm->exec -= stack_shift;

    ...

    if (stack_shift) {
        shift_arg_pages(vma, stack_shift);
    }

    stack_expand = 131072UL;
    stack_size = vma->vm_end - vma->vm_start;

    rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
    if (stack_size + stack_expand > rlim_stack)
        stack_base = vma->vm_end - rlim_stack;
    else
        stack_base = vma->vm_start - stack_expand;
    current->mm->start_stack = bprm->p;
    expand_stack(vma, stack_base);
}

  傳入的參數stack_top添加了隨機因子,首先對該stack_top進行頁對齊,然后計算位移stack_shift,再將該位移添加到棧的指針bprm->p也即當前參數的存放地址mm->arg_start。省略的部分是對標志位的修改,再往下既然修改了棧的指針,就要通過shift_arg_pages函數修改堆棧對應的虛擬內存了。最后需要通過expand_stack函數拓展棧的大小,默認為stack_expand即4個頁面。

  ④ load_elf_binary->elf_map: 

static unsigned long elf_map(struct file *filep, unsigned long addr,
        struct elf_phdr *eppnt, int prot, int type,
        unsigned long total_size)
{
    unsigned long map_addr;
    unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
    unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
    addr = ELF_PAGESTART(addr);
    size = ELF_PAGEALIGN(size);

    if (!size)
        return addr;

    if (total_size) {
        total_size = ELF_PAGEALIGN(total_size);
        map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
        if (!BAD_ADDR(map_addr))
            vm_munmap(map_addr+size, total_size-size);
    } else
        map_addr = vm_mmap(filep, addr, size, prot, type, off);

    return(map_addr);
}

  傳入的參數filep是文件指針,addr是即將映射的內存中的虛擬地址,size是文件映像的大小,off是映像在文件中的偏移。elf_map函數主要通過vm_mmap為文件申請虛擬空間並進行相應的映射,然后返回虛擬空間的起始地址map_addr。

  ⑤ load_elf_binary->start_thread

 

#define start_thread(regs,pc,sp)                    \
({                                    \
    unsigned long r7, r8, r9;                    \
                                    \
    if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC)) {            \
        r7 = regs->ARM_r7;                    \
        r8 = regs->ARM_r8;                    \
        r9 = regs->ARM_r9;                    \
    }                                \
    memset(regs->uregs, 0, sizeof(regs->uregs));            \
    if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&            \
        current->personality & FDPIC_FUNCPTRS) {            \
        regs->ARM_r7 = r7;                    \
        regs->ARM_r8 = r8;                    \
        regs->ARM_r9 = r9;                    \
        regs->ARM_r10 = current->mm->start_data;        \
    } else if (!IS_ENABLED(CONFIG_MMU))                \
        regs->ARM_r10 = current->mm->start_data;        \
    if (current->personality & ADDR_LIMIT_32BIT)            \
        regs->ARM_cpsr = USR_MODE;                \
    else                                \
        regs->ARM_cpsr = USR26_MODE;                \
    if (elf_hwcap & HWCAP_THUMB && pc & 1)                \
        regs->ARM_cpsr |= PSR_T_BIT;                \
    regs->ARM_cpsr |= PSR_ENDSTATE;                    \
    regs->ARM_pc = pc & ~1;        /* pc */            \
    regs->ARM_sp = sp;        /* sp */            \
})

 

 

 

 

  傳入的參數regs為保存的寄存器,new_ip為解釋器或者應用程序的起始代碼地址,new_sp為用戶空間的堆棧指針。設置完這些變量后,最后通過force_iret強制返回,跳到new_ip指向的地址處開始執行。對於glibc而言,最終就會跳轉到_start函數中

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM