Cortex-M HardFault問題定位以及CmBacktrace分析

本文轉載自查看原文 2020-04-06 13:49 750

使用Cortex-M系列MCU開發程序時不可避免的會遇到HardFault問題，常用的方法由HardFault_S.s和HardFault_C.c兩個文件組成，代碼分別如下：

    /* Assembly file for gcc */
    .text
    .syntax unified
    .thumb
    .type HardFault_Handler, %function
    .global HardFault_Handler
    .global HardFault_Handler_c

    /* 此處需要補充對MSP有效性的檢查，以防止進入Lockup */
HardFault_Handler:
    tst lr, #4
    ite eq
    mrseq r0, msp /* stacking was using MSP */
    mrseq r0, psp /* stacking was using PSP */
    mov r1, lr /* second parameter */
    ldr r2,=HardFault_Handler_c
    bx r2
    .end

// Second part of the HardFault handler in C
void HardFault_Handler_C(unsigned long * hardfault_args, unsigned int lr_value)
{
    unsigned long stacked_r0;
    unsigned long stacked_r1;
    unsigned long stacked_r2;
    unsigned long stacked_r3;
    
    unsigned long stacked_r12;
    unsigned long stacked_lr;
    unsigned long stacked_pc;
    unsigned long stacked_psr;
    unsigned long cfsr;
    unsigned long bus_fault_address;
    unsigned long memmanage_fault_address;
    bus_fault_address = SCB->BFAR;
    memmanage_fault_address = SCB->MMFAR;
    cfsr = SCB->CFSR;
    stacked_r0 = ((unsigned long) hardfault_args[0]);
    stacked_r1 = ((unsigned long) hardfault_args[1]);
    stacked_r2 = ((unsigned long) hardfault_args[2]);
    stacked_r3 = ((unsigned long) hardfault_args[3]);
    stacked_r12 = ((unsigned long) hardfault_args[4]);
    stacked_lr = ((unsigned long) hardfault_args[5]);
    stacked_pc = ((unsigned long) hardfault_args[6]);
    stacked_psr = ((unsigned long) hardfault_args[7]);
    printf ("[HardFault]\n");
    printf ("- Stack frame:\n");
    printf (" R0 = %x\n", stacked_r0);
    printf (" R1 = %x\n", stacked_r1);
    printf (" R2 = %x\n", stacked_r2);
    printf (" R3 = %x\n", stacked_r3);
    printf (" R12 = %x\n", stacked_r12);
    printf (" LR = %x\n", stacked_lr);
    printf (" PC = %x\n", stacked_pc);
    printf (" PSR = %x\n", stacked_psr);
    printf ("- FSR/FAR:\n");
    printf (" CFSR = %x\n", cfsr);
    printf (" HFSR = %x\n", SCB->HFSR);
    printf (" DFSR = %x\n", SCB->DFSR);
    printf (" AFSR = %x\n", SCB->AFSR);
    if (cfsr & 0x0080) printf (" MMFAR = %x\n",
    memmanage_fault_address);
    if (cfsr & 0x8000) printf (" BFAR = %x\n", bus_fault_address);
    printf ("- Misc\n");
    printf (" LR/EXC_RETURN= %x\n", lr_value);

    while(1); // endless loop
}

匯編文件中的HardFault_Handler判斷出錯前使用的是MSP還是PSP，之后調用C語言編寫的HardFault_Handler_C處理函數，在其中打印輸出內核寄存器以及相關Fault Status寄存器，

之后根據打印出來的PC和LR在反匯編程序中定位在具體位置和具體原因。

當項目復雜度增加時，需要知道出錯時整個函數調用順序以加速定位問題，並且每次都手動去反匯編文件中查找效率也比較低。github上的CmBacktrace開源軟件可有效解決此問題，

CmBacktrace由RT-Thread的大佬開源，具體思路與上述描述的思路差不多，但是增加堆棧調用的追溯，類似與GDB中bt的功能。

其實現追溯堆棧的功能的核心代碼如下：

    /* copy called function address */
    for (; sp < stack_start_addr + stack_size; sp += sizeof(size_t)) 
    {
        /* the *sp value may be LR, so need decrease a word to PC */
        pc = *((uint32_t *) sp) - sizeof(size_t);
        /* the Cortex-M using thumb instruction, so the pc must be an odd number */
        if (pc % 2 == 0) 
        {
            continue;
        }

        /* 此處應先判斷是BL還是BLX */
        /* fix the PC address in thumb mode */
        pc = *((uint32_t *) sp) - 1;
        if ((pc >= code_start_addr) && (pc <= code_start_addr + code_size) && (depth < CMB_CALL_STACK_MAX_DEPTH)
                /* check the the instruction before PC address is 'BL' or 'BLX' */
                && disassembly_ins_is_bl_blx(pc - sizeof(size_t)) && (depth < size)) 
        {
            /* the second depth function may be already saved, so need ignore repeat */
            if ((depth == 2) && regs_saved_lr_is_valid && (pc == buffer[1])) 
            {
                continue;
            }
            buffer[depth++] = pc;
        }
    }

/* check the disassembly instruction is 'BL' or 'BLX' */
static bool disassembly_ins_is_bl_blx(uint32_t addr) {
    uint16_t ins1 = *((uint16_t *)addr);
    uint16_t ins2 = *((uint16_t *)(addr + 2));

    #define BL_INS_MASK         0xF800
    #define BL_INS_HIGH         0xF800
    #define BL_INS_LOW          0xF000
    #define BLX_INX_MASK        0xFF00
    #define BLX_INX             0x4700

    if ((ins2 & BL_INS_MASK) == BL_INS_HIGH && (ins1 & BL_INS_MASK) == BL_INS_LOW) {
        return true;
    } else if ((ins2 & BLX_INX_MASK) == BLX_INX) {
        return true;
    } else {
        return false;
    }
}

for循環解析整個棧空間的數據（從當前棧指針的位置一直到棧底），之后要完全理解循環體的代碼，需要了解Cortex-M內部的一些機制具體如下：

a.Thumb指令是16bit的，之后又擴展了32bit的指令

b.Cortex-M系列只支持Thumb狀態，不支持ARM狀態，這個特點決定LR寄存器在保存函數調用的返回地址時，地址的bit0必須是1

c.函數調用由BL或BLX指令實現，其指令編碼如下：

有了以上這些知識可以去分析循環體部分的代碼了。

對堆棧中的數據首先判斷奇偶，是偶數直接跳過，返回地址必須是奇數

判斷是否在代碼段的地址空間內，鏈接時確定text段的放置位置

根據堆棧中記錄的返回地址是發生跳轉指令的下一條指令，因為BL是32bit指令，BLX是16bit指令，所以需要分別判斷。

根據PC值取出此處的指令，根據指令編碼判斷是否為BL或BLX指令。

將所有條件均滿足的數據存儲，以供后續輸出。

之后根據add2line命令可以輕易的知道函數的調用順序。

基本原理就是這樣。

以上是自己對於HardFault處理方法和CmBacktrace的理解，可能有錯誤，如發現請及時指出，互相學習，共同進步。

0412：

之前理解PC-4時以為是三級流水線的原因，其實不是，發生函數調用時會flush指令流水線，所以正確的理解應該是返回地址的前一條指令。

2020/11/19

今天突然想到cmbacktrace的有時候打印函數調用關系不准確的原因：

1.調用棧空間沒有被破壞

2.函數返回退棧的過程中並不會清除已使用的內存，導致無差別掃描棧空間有概率誤解析

相關資源：

1.book:The Definitive Guide to Arm® Cortex®-M3 and Cortex®-M4 Processors-Newnes (2014)-Joseph Yiu

2.book:DDI0403E_c_armv7m_arm

3.https://github.com/armink/CmBacktrace

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Cortex-M 處理器 hardfault 定位方法和步驟（基於Keil mdk）一點理解之 CmBacktrace: ARM Cortex-M 系列 MCU 錯誤追蹤庫 no cortex-m sw device found問題 Cortex-M系列內核啟動文件分析 could not stop cortex-m device Cortex-M處理器架構 cortex-A cortex-R cortex-M處理器的性能比較 ARM Cortex-M內核中ROM table的使用如何使用MCUXpresso IDE創建一個Cortex-M工程 No Cortex-M SW Device Found 解決方法