A53的mmu配置說明


1 MMU簡介

1.1 為什么需要MMU

  • MMU的作用是將core發出的虛擬地址VA轉換為物理地址PA。在操作系統里,進程的VA可以很大且連續,進程不用考慮PA的實際情況,有MMU完成VA到PA的轉換
  • 裸跑程序里,一般不向OS這么復雜,MMU會做平坦映射,即VA=PA

1.2 裸跑不開MMU行不行

  • 行,但效率很低。
  • 現在的CPU,要想使用Cache,必須使能MMU,MMU頁表里有cache訪問屬性配置。
  • 在ARM里,如果不開MMU,不僅不能開啟cache,連內存屬性都不是normal,而是device,device屬性不允許硬件對AXI總線的信號進行合並、亂序等,效率較低。
  • 所以,一般的CPU啟動代碼,會較早開啟MMU和ARM,ATF的BL1(bootrom)就是這樣。

1.3 架構

 

 

如上圖

  • MMU集成在每個core里,每個core有1個MMU
  • MMU做VA到PA的轉換時,需要轉換規則,這個規則叫==頁表==
  • 頁表保存在memory里,通過寄存器高速MMU單元頁表的存放位置
  • TLB是MMU里的小cache,用於保存已經讀取過的頁表,這樣可以大大提高效率
  • 如果MMU要讀取的頁表不在TLB里,MMU就要去memory里找頁表,這叫Table Walk,類似cache miss,效率較低
  • MMU在讀memory時,可以過cache,也可以不過。有寄存器可以配置

2 A53的MMU細節

2.1 TTBL0_EL1/2/3和TTBL1_EL1

  • TTBL存頁表的地址,MMU據此找頁表
  • 為什么EL1有兩個,TTBL0_EL1和TTBL1_EL1

     

     



    在有OS的系統里,OS的地址映射幾乎是不變的,為了提高效率,專門把TTBL1_EL1給kernel用,user用TTBL0_EL1.
  • linux的kernel空間地址FFFF , user空間為0000 ,why?
    VA的高bit用於選擇TTBL,kernel的FFFF,MMU會自動用TTBL1_EL1,反之用TTBL0_EL1

2.2 TCR_EL*

 

 

主要控制:

  • IPS, PA的位寬
  • T1SZ,T0SZ: VA位寬
  • TG0,TG1:最小顆粒度,A53支持4KB和64KB,不支持16KB
  • 還有MMU table walk時的cache屬性配置

2.3 MAIR_EL*

 

 


在ARMv8架構里,頁表不再包含具體的cache屬性配置,而只包含1個3bit的index,這個index就是指向MAIR_EL*的某一個Attr,每一個Attr可以配置成不同的cache屬性。

2.4 MMU頁表

 

 


以4KB顆粒度進行說明:

  • 當VA設置少於39時,Level0就可以省略,TTBL直接指向level1即可。以此類推
  • block指塊,到此結束,不會再有下級。例如l1的block為1G,則只用1個描述符就可以描述1G空間
  • Point to下一級時,為Table描述符,指向下一級的基址

3 配置實例

以某處理器裸跑為例:

  • VA = 32bit
  • l1: bit31 - bit30, 4 entries
  • l2: 29 - 21, 512 entries
  • l3: 20 - 12, 512 entries

 

 

  • level 1在memory的值:

level 2在memory的值:

  • level 3在memory的值,E8000000對應的80004200:

code:

//----------------------------------------------------------------
// setup tx511 translation table
//
//----------------------------------------------------------------
#include "v8_mmu.h"

    .text
    .cfi_sections .debug_frame  // put stack frame info into .debug_frame instead of .eh_frame

	.global setup_ttb
	.global ZeroBlock

    .global __ttb0_l1
    .global __ttb0_l2_ram
    .global __ttb0_l3_ram_e80
    .global __ttb0_l3_ram_e82
    .global __ttb0_l3_ram_e84
    .global __ttb0_l3_ram_e86


//----------------------------------------------------------------
// setup tx511 translation table
// level 1 table, 4 entries:
// 0000 0000 - 3FFF FFFF, 1GB block, DDR
// 4000 0000 - 7FFF FFFF, 1GB block, DDR
// 8000 0000 - BFFF FFFF, 1GB block, DDR
// C000 0000 - FFFF FFFF, point to level2 tabel
//
// level 2 table, 512 entries:
// C000 0000 - DFFF FFFF, 256 entries, 512MB DDR, 2MB block
// E000 0000 - E3FF FFFF, 32  entries, 64MB OSPI0 flash, 2MB block
// E400 0000 - E7FF FFFF, 32  entries, 64MB OSPI1 flash, 2MB block

// E800 0000 - E81F FFFF, 1 entry, point to level 3_1
// E820 0000 - E83F FFFF, 1 entry, point to level 3_2
//
//----------------------------------------------------------------


    .type setup_ttb, "function"
    .cfi_startproc
setup_ttb:

    //
    // x21 = address of L1 tables
    // x22 = address of L2 tables
    // x23 = address of L3 tables E80
    // x24 = address of L3 tables E82
    // x25 = address of L3 tables E84
    // x26 = address of L3 tables E86
    //
    ldr x2, =0
    ldr x3, =0

    ldr x21, =__ttb0_l1
    mov x0, x21
    mov x1, #(4 << 3)
    // can not call func ZeroBlock,  not support nesting
loop_zero_0:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_0

    ldr x22, =__ttb0_l2_ram
    mov x1, #(512 << 3)
    mov x0, x22
loop_zero_1:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_1

    ldr x23, =__ttb0_l3_ram_e80
    mov x1, #(512 << 3)
    mov x0, x23
loop_zero_2:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_2

    ldr x24, =__ttb0_l3_ram_e82
    mov x1, #(512 << 3)
    mov x0, x24
loop_zero_3:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_3

    ldr x25, =__ttb0_l3_ram_e84
    mov x1, #(512 << 3)
    mov x0, x25
loop_zero_4:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_4

    ldr x26, =__ttb0_l3_ram_e86
    mov x1, #(512 << 3)
    mov x0, x26
loop_zero_5:
    subs x1, x1, #16
    stp  x2, x3, [x0, #-16]!
    b.ne loop_zero_5

	// 0000 0000 - 3FFF FFFF, 1GB block, DDR
	// 4000 0000 - 7FFF FFFF, 1GB block, DDR
	// 8000 0000 - BFFF FFFF, 1GB block, DDR
	// 3 1G block, write to l1 table
	//
	ldr x1, =3
	ldr x2, =0x40000000
	ldr x3, =(0x00000000	   | \
			  TT_S1_ATTR_BLOCK | \
             (1 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
    mov x4, x21

loop1:
	str x3, [x4], #8
	add x3, x3, x2
	subs x1, x1, #1
	bne loop1

	// C000 0000 - FFFF FFFF, point to level2 tabel, write to l1 table
    orr x1, x22, #TT_S1_ATTR_PAGE
    str x1, [x4]


	// level 2 table: C000 0000 - DFFF FFFF, 256 entries, 512MB DDR, 2MB block
	ldr x1, =256
	ldr x2, =0x200000
	ldr x3, =(0xC0000000	   | \
			  TT_S1_ATTR_BLOCK | \
             (1 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
    mov x4, x22
loop2_ddr:
	str x3, [x4], #8
	add x3, x3, x2
	subs x1, x1, #1
	bne loop2_ddr

	// level 2 table: E000 0000 - E3FF FFFF, 32 entries, 64MB OSPI0 flash, 2MB block
	ldr x1, =32
	ldr x2, =0x200000
	ldr x3, =(0xE0000000	   | \
			  TT_S1_ATTR_BLOCK | \
             (1 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)

loop2_ospi0:
	str x3, [x4], #8
	add x3, x3, x2
	subs x1, x1, #1
	bne loop2_ospi0

	// level 2 table: E400 0000 - E7FF FFFF, 32 entries, 64MB OSPI1 flash, 2MB block
	ldr x1, =32
	ldr x2, =0x200000
	ldr x3, =(0xE4000000	   | \
			  TT_S1_ATTR_BLOCK | \
             (1 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
loop2_ospi1:
	str x3, [x4], #8
	add x3, x3, x2
	subs x1, x1, #1
	bne loop2_ospi1

	// level 2 table: E800 0000 - E81F FFFF, 1 entry, point to level 3_1
    orr x1, x23, #TT_S1_ATTR_TABLE
    ldr x2, =0xE8000000
	ubfx x3, x2, #21, #9
    str x1, [x22, x3, lsl #3]

	// level 2 table: E820 0000 - E83F FFFF, 1 entry, point to level 3_2
    orr x1, x24, #TT_S1_ATTR_TABLE
    ldr x2, =0xE8200000
	ubfx x3, x2, #21, #9
    str x1, [x22, x3, lsl #3]

	// level 2 table: E840 0000 - E85F FFFF, 1 entry, point to level 3_3
    orr x1, x25, #TT_S1_ATTR_TABLE
    ldr x2, =0xE8400000
	ubfx x3, x2, #21, #9
    str x1, [x22, x3, lsl #3]

	// level 2 table: E860 0000 - E87F FFFF, 1 entry, point to level 3_4
    orr x1, x26, #TT_S1_ATTR_TABLE
    ldr x2, =0xE8600000
	ubfx x3, x2, #21, #9
    str x1, [x22, x3, lsl #3]


	// level 3 table: E800 0000 - E81F FFFF, 512 entry, x23
	// E800 0000 - E803 FFFF, 256kB on-chip-sram
	ldr x1, =0x1000
	ldr x2, =4
	ldr x3, =0xE8000000
	ldr x4, = (TT_S1_ATTR_PAGE | \
             (1 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
loop3_sram:
	ubfx x5, x3, #12, #9
    orr x6, x3, x4
	str x6, [x23, x5, lsl #3]
	add x3, x3, x1
	subs x2, x2, #1
	bne loop3_sram

	// level 3 table: E820 0000 - E83F FFFF, 512 entry, x24
	// valid addr  E820 0000 - E838 6FFF
	ldr x1, =0x1000
	ldr x2, =((0xE8386FFF + 1 - 0xE8200000) >> 12)
	ldr x3, =0xE8200000
	ldr x4, = (TT_S1_ATTR_PAGE | \
             (2 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
loop3_Top_ahb:
	ubfx x5, x3, #12, #9
    orr x6, x3, x4
	str x6, [x24, x5, lsl #3]
	add x3, x3, x1
	subs x2, x2, #1
	bne loop3_Top_ahb

	// level 3 table: E840 0000 - E85F FFFF, 512 entry, x25
	// valid addr  E840 0000 - E850 FFFF
	ldr x1, =0x1000
	ldr x2, =((0xE850FFFF + 1 - 0xE8400000) >> 12)
	ldr x3, =0xE8400000
	ldr x4, = (TT_S1_ATTR_PAGE | \
             (2 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
loop3_lp_hp_gic_ddr_ahb:
	ubfx x5, x3, #12, #9
    orr x6, x3, x4
	str x6, [x25, x5, lsl #3]
	add x3, x3, x1
	subs x2, x2, #1
	bne loop3_lp_hp_gic_ddr_ahb

	// level 3 table: E860 0000 - E87F FFFF, 512 entry, x26
	// valid addr  E860 0000 - E869 4FFF
	ldr x1, =0x1000
	ldr x2, =((0xE8694FFF + 1 - 0xE8600000) >> 12)
	ldr x3, =0xE8600000
	ldr x4, = (TT_S1_ATTR_PAGE | \
             (2 << TT_S1_ATTR_MATTR_LSB) | \
              TT_S1_ATTR_NS | \
              TT_S1_ATTR_AP_RW_PL1 | \
              TT_S1_ATTR_SH_INNER | \
              TT_S1_ATTR_AF | \
              TT_S1_ATTR_nG)
loop3_vo_vi_ahb:
	ubfx x5, x3, #12, #9
    orr x6, x3, x4
	str x6, [x26, x5, lsl #3]
	add x3, x3, x1
	subs x2, x2, #1
	bne loop3_vo_vi_ahb


    ret
    .cfi_endproc


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM