newlib 中的 crt0 流程分析


最近對 newlib 中的啟動代碼 crt0 產生了興趣,於是就分析了下其代碼。crt0 的源碼位於 libgloss/arm/crt0.S,為了兼容各種 ARM 架構,crt0.S 中有大量的條件判斷宏定義,對於只關心 ARMv7e-M 的我來說很是痛苦。剛好手上有個基於 STM32F412 的 mbed 工程用的是 crt0 的啟動方式,參考 crt0.o 的反匯編我可以提煉出 crt0.S 中和 ARMv7e-M 相關的部分代碼。

crt0.o 的反匯編如下:

08008220 <_mainCRTStartup>:
 8008220:    4b15          ldr    r3, [pc, #84]    ; (8008278 <_mainCRTStartup+0x58>)
 8008222:    2b00          cmp    r3, #0
 8008224:    bf08          it    eq
 8008226:    4b13          ldreq    r3, [pc, #76]    ; (8008274 <_mainCRTStartup+0x54>)
 8008228:    469d          mov    sp, r3
 800822a:    f5a3 3a80     sub.w    sl, r3, #65536    ; 0x10000
 800822e:    2100          movs    r1, #0
 8008230:    468b          mov    fp, r1
 8008232:    460f          mov    r7, r1
 8008234:    4813          ldr    r0, [pc, #76]    ; (8008284 <_mainCRTStartup+0x64>)
 8008236:    4a14          ldr    r2, [pc, #80]    ; (8008288 <_mainCRTStartup+0x68>)
 8008238:    1a12          subs    r2, r2, r0
 800823a:    f01c fcd7     bl    8024bec <memset>
 800823e:    4b0f          ldr    r3, [pc, #60]    ; (800827c <_mainCRTStartup+0x5c>)
 8008240:    2b00          cmp    r3, #0
 8008242:    d000          beq.n    8008246 <_mainCRTStartup+0x26>
 8008244:    4798          blx    r3
 8008246:    4b0e          ldr    r3, [pc, #56]    ; (8008280 <_mainCRTStartup+0x60>)
 8008248:    2b00          cmp    r3, #0
 800824a:    d000          beq.n    800824e <_mainCRTStartup+0x2e>
 800824c:    4798          blx    r3
 800824e:    2000          movs    r0, #0
 8008250:    2100          movs    r1, #0
 8008252:    0004          movs    r4, r0
 8008254:    000d          movs    r5, r1
 8008256:    480d          ldr    r0, [pc, #52]    ; (800828c <_mainCRTStartup+0x6c>)
 8008258:    2800          cmp    r0, #0
 800825a:    d002          beq.n    8008262 <_mainCRTStartup+0x42>
 800825c:    480c          ldr    r0, [pc, #48]    ; (8008290 <_mainCRTStartup+0x70>)
 800825e:    f00f f868     bl    8017332 <__wrap_atexit>
 8008262:    f01c f805     bl    8024270 <__libc_init_array>
 8008266:    0020          movs    r0, r4
 8008268:    0029          movs    r1, r5
 800826a:    f00f f821     bl    80172b0 <__wrap_main>
 800826e:    f00f f85d     bl    801732c <__wrap_exit>
 8008272:    bf00          nop
 8008274:    00080000     .word    0x00080000
 8008278:    20040000     .word    0x20040000
 800827c:    00000000     .word    0x00000000
 8008280:    080172a3     .word    0x080172a3
 8008284:    20000c00     .word    0x20000c00
 8008288:    2000ac58     .word    0x2000ac58
 800828c:    08017333     .word    0x08017333
 8008290:    00000000     .word    0x00000000

提煉后的 crt0.S 代碼如下:

    FUNC_START  _mainCRTStartup
    FUNC_START  _start
/* Start by setting up a stack */

    /*  Set up the stack pointer to a fixed value */
    /*  Changes by toralf:
        - Allow linker script to provide stack via __stack symbol - see
          defintion of .Lstack
        - Provide "hooks" that may be used by the application to add
          custom init code - see .Lhwinit and .Lswinit  
        - Go through all execution modes and set up stack for each of them.
          Loosely based on init.s from ARM/Motorola example code.
              Note: Mode switch via CPSR is not allowed once in non-privileged
            mode, so we take care not to enter "User" to set up its sp,
            and also skip most operations if already in that mode. */

    ldr r3, .Lstack
    cmp r3, #0

    it  eq

    ldreq   r3, .LC0
    /* Note: This 'mov' is essential when starting in User, and ensures we
         always get *some* sp value for the initial mode, even if we 
         have somehow missed it below (in which case it gets the same
         value as FIQ - not ideal, but better than nothing.) */
    mov sp, r3

.LC23:
    /* Setup a default stack-limit in-case the code has been
       compiled with "-mapcs-stack-check".  Hard-wiring this value
       is not ideal, since there is currently no support for
       checking that the heap and stack have not collided, or that
       this default 64k is enough for the program being executed.
       However, it ensures that this simple crt0 world will not
       immediately cause an overflow event:  */
    sub sl, r3, #64 << 10   /* Still assumes 256bytes below sl */

    /* Zero the memory in the .bss section.  */
    movs    a2, #0          /* Second arg: fill value */
    mov fp, a2          /* Null frame pointer */
    mov r7, a2          /* Null frame pointer for Thumb */
    
    ldr a1, .LC1        /* First arg: start of memory block */
    ldr a3, .LC2    
    subs    a3, a3, a1      /* Third arg: length of block */
    
    bl  memset

/* Changes by toralf: Taken from libgloss/m68k/crt0.S
 * initialize target specific stuff. Only execute these
 * functions it they exist.
 */
    ldr r3, .Lhwinit
    cmp r3, #0
    beq .LC24
    indirect_call r3
.LC24:  
    ldr r3, .Lswinit
    cmp r3, #0
    beq .LC25
    indirect_call r3

.LC25:  
    movs    r0, #0      /*  no arguments  */
    movs    r1, #0      /*  no argv either */

    /* Some arm/elf targets use the .init and .fini sections
       to create constructors and destructors, and for these
       targets we need to call the _init function and arrange
       for _fini to be called at program exit.  */
    movs    r4, r0
    movs    r5, r1e

    /* Make reference to atexit weak to avoid unconditionally pulling in
       support code.  Refer to comments in __atexit.c for more details.  */
    ldr r0, .Latexit
    cmp r0, #0
    beq .Lweak_atexit

    ldr r0, .Lfini
    bl  atexit
.Lweak_atexit:
    bl  _init
    movs    r0, r4
    movs    r1, r5

    bl  main

    bl  exit        /* Should not return.  */
    
    /* For Thumb, constants must be after the code since only 
       positive offsets are supported for PC relative addresses.  */
.LC0:
    .word   0x80000         /* Top of RAM on the PIE board.  */
.Lstack:    
    .word   __stack
.Lhwinit:   
    .word   ardware_init_hook
.Lswinit:
    .word   software_init_hook

    /* Set up defaults for the above variables in the form of weak symbols
       - so that application will link correctly, and get value 0 in
       runtime (meaning "ignore setting") for the variables, when the user
       does not provide the symbols. (The linker uses a weak symbol if,
       and only if, a normal version of the same symbol isn't provided
       e.g. by a linker script or another object file.) */  

    .weak __stack
    .weak hardware_init_hook
    .weak software_init_hook

.LC1:
  .word __bss_start__
.LC2:
  .word __bss_end__

  .weak atexit
.Latexit:
  .word atexit

  /* Weak reference _fini in case of lite exit.  */
  .weak _fini
.Lfini:
  .word _fini

crt0 啟動流程如下:

  1. 設置 SP 為 __stack,若 __stack 未被用戶定義,則使用默認的值(0x80000處的值)。
  2. 清空 .bss 段,起始地址為 __bss_start__,結束地址為 __bss_end__ 。
  3. 若用戶定義了 hardware_init_hook 和 software_init_hook ,則調用它們。
  4. 若用戶定義了 atexit,則調用它,並將傳遞參數 _fini(_fini 被宏定義為 __libc_fini_array)。
  5. 調用 _init(_ini 被宏定義為 __libc_ini_array)。
  6. 調用 main(argc 和 argv 都等於 0)。
  7. 調用 exit。

其中 __stack,__bss_start__ 和 __bss_end__ 必須被定義。

hardware_init_hook 和 software_init_hook 可以實現一些需要在 main 之前的功能。

aiexit,exit,_init 和 _fini 一般是和 C++ 的全局構造和析構有關,這個放在下一節來分析。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM