linux thread_info 與thread_struct


 有個同事看3.10代碼中,看着兩個結構,會混淆,所以我簡單答復了一下。

thread_info是和內核棧放一塊的,網上到處都是thread_info的資料,但thread_struct的資料比較少,在此記錄下,以備忘

struct thread_info {
    struct task_struct    *task;        /* main task structure */
    struct exec_domain    *exec_domain;    /* execution domain */
    __u32            flags;        /* low level flags */
    __u32            status;        /* thread synchronous flags */
    __u32            cpu;        /* current CPU */
    int            preempt_count;    /* 0 => preemptable,
                           <0 => BUG */
    mm_segment_t        addr_limit;
    struct restart_block    restart_block;
    void __user        *sysenter_return;
#ifdef CONFIG_X86_32
    unsigned long           previous_esp;   /* ESP of the previous stack in
                           case of nested (IRQ) stacks
                        */
    __u8            supervisor_stack[0];
#endif
    unsigned int        sig_on_uaccess_error:1;
    unsigned int        uaccess_err:1;    /* uaccess failed */
};

 thread_info 在task_struct中的stack成員,它為啥能和內核棧成為union呢,按道理union里面的內容都是不同時有效的,也就是既然用作了A成員,則B不可能使用,但是明顯我們的

thread_info結構和內核棧是同時使用的,其實可以理解為thread_info 放在了內核棧的下面,因為棧的增長方向是地址大到地址小,所以兩者不沖突。這也間接說明了,

其實內核棧沒有union那么大,要被thread_info占據一部分。放在一起還有個好處就是根據esp能夠

快速地查找到task_struct的指針,因為thread_info的第一個成員就是task_struct指針。通過將esp的末尾幾位設置為0就ok。到底設置多少位,是與棧的大小相關的。

比如64為的x86,默認內核棧大小為:

#define THREAD_SIZE_ORDER    2
#define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
#define get_current() (current_thread_info()->task)
#define current get_current()

static inline struct thread_info *current_thread_info(void)
{
    register unsigned long sp asm ("sp");
    return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}

所以經常可以看到代碼中使用 current 宏,就是通過sp指針來找到taks_struct.看下面的一個例子更能理解:

crash> bt
PID: 178838  TASK: ffff88290f7ddee0  CPU: 9   COMMAND: "kthread_send/9"----------------當前task指針為ffff88290f7ddee0
 #0 [ffff882fbe843a70] machine_kexec at ffffffff8105d77b
 #1 [ffff882fbe843ad0] __crash_kexec at ffffffff8110aca2
 #2 [ffff882fbe843ba0] panic at ffffffff816ad52f
 #3 [ffff882fbe843c20] watchdog_timer_fn at ffffffff81135a51
 #4 [ffff882fbe843c58] __hrtimer_run_queues at ffffffff810b93a6
 #5 [ffff882fbe843cb0] hrtimer_interrupt at ffffffff810b993f
 #6 [ffff882fbe843cf8] local_apic_timer_interrupt at ffffffff8105467b
 #7 [ffff882fbe843d10] smp_apic_timer_interrupt at ffffffff816c9e83
 #8 [ffff882fbe843d28] apic_timer_interrupt at ffffffff816c6732
 #9 [ffff882fbe843dc8] queued_spin_lock_slowpath at ffffffff816adeee
#10 [ffff882fbe843dd8] _raw_spin_lock at ffffffff816bb080
#11 [ffff882fbe843de8] dev_watchdog at ffffffff815bca52
#12 [ffff882fbe843e28] call_timer_fn at ffffffff8109a9c8
#13 [ffff882fbe843e60] run_timer_softirq at ffffffff8109ceed
#14 [ffff882fbe843ed8] __do_softirq at ffffffff8109404d
#15 [ffff882fbe843f48] call_softirq at ffffffff816c8afc
#16 [ffff882fbe843f60] do_softirq at ffffffff8102d435
#17 [ffff882fbe843f80] irq_exit at ffffffff81094495
#18 [ffff882fbe843f98] smp_apic_timer_interrupt at ffffffff816c9e88
#19 [ffff882fbe843fb0] apic_timer_interrupt at ffffffff816c6732
--- <IRQ stack> ---
#20 [ffff882b680d3c28] apic_timer_interrupt at ffffffff816c6732
    [exception RIP: ixgbe_xmit_frame_ring+83]
    RIP: ffffffffc01299e3  RSP: ffff882b680d3cd0  RFLAGS: 00000212---------------------在中斷之前的rsp
    RAX: 0000000000000562  RBX: 0000000000000001  RCX: 000000000000403d
    RDX: ffff882fb9331c00  RSI: ffff8828d7b8fac0  RDI: 0000000000000001
    RBP: ffff882b680d3d48   R8: 0000000000000008   R9: 0000a0a5447b9d78
    R10: ffff8828c6e84f00  R11: 000000002b3000b8  R12: ffff8828c0291b00
    R13: 0000000022300000  R14: 0000000000000001  R15: ffff882b680d3cc0
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#21 [ffff882b680d3d50] ixgbe_xmit_frame at ffffffffc012a918 [ixgbe]
#22 [ffff882b680d3d80] wit_send_tasklet at ffffffffc043b63c [witdriver]
#23 [ffff882b680d3e78] wit_kthread_xmit_fn at ffffffffc043ba95 [witdriver]
#24 [ffff882b680d3ec8] kthread at ffffffff810b5241
#25 [ffff882b680d3f50] ret_from_fork at ffffffff816c5577

根據task_struct 找stack:

crash> task_struct.stack ffff88290f7ddee0
  stack = 0xffff882b680d0000
crash> rd 0xffff882b680d0000
ffff882b680d0000:  ffff88290f7ddee0----------------------stack中的第一個成員就是指向task_struct的

再看看esp 的值  ffff882b680d3cd0 與 stack的值 0xffff882b680d0000 ,兩者其實就是14位的相差,也就是 16k的低位不同。

有時候我們會遇到內核堆棧越界的情況,越界就是棧變量向下擴展的時候,踩到了thread_info結構的成員。

這時會遇到:Thread overran stack, or stack corrupted 這樣的打印,判斷的標准就是thread_info的上面留了一個magic特征字:

#define STACK_END_MAGIC        0x57AC6E9D

以下面例子來說明:

crash> struct thread_info
struct thread_info {
    struct task_struct *task;
    struct exec_domain *exec_domain;
    __u32 flags;
    __u32 status;
    __u32 cpu;
    int preempt_count;
    mm_segment_t addr_limit;
    struct restart_block restart_block;
    void *sysenter_return;
    unsigned int sig_on_uaccess_error : 1;
    unsigned int uaccess_err : 1;
}
SIZE: 104

crash> px 0xffff882b680d0000 + 104
$8 = 0xffff882b680d0068

crash> rd 0xffff882b680d0068
ffff882b680d0068: 0000000057ac6e9d .n.W.... -----------------對應的magic特征字

 

在一些服務器中,經常會使用 echo 1 > /proc/sys/kernel/stack_tracer_enabled 的方式來監控線程棧,這個會使得能夠打印最深的棧

cat /sys/kernel/debug/tracing/stack_trace
        Depth    Size   Location    (41 entries)
        -----    ----   --------
  0)     4120      16   mempool_alloc_slab+0x15/0x20
  1)     4104     128   mempool_alloc+0x6e/0x170
  2)     3976      16   sg_pool_alloc+0x45/0x50
  3)     3960      88   __sg_alloc_table+0xd6/0x140
  4)     3872      40   sg_alloc_table_chained+0x3c/0x90
  5)     3832      40   scsi_init_sgtable+0x26/0x70
  6)     3792      72   scsi_init_io+0x4e/0x200
  7)     3720      80   sd_setup_read_write_cmnd+0x3d/0x950 [sd_mod]
  8)     3640      16   sd_init_command+0x2f/0xc0 [sd_mod]
  9)     3624      32   scsi_setup_cmnd+0x111/0x1c0
 10)     3592      56   scsi_prep_fn+0xdb/0x180
 11)     3536      40   blk_peek_request+0x16a/0x290
 12)     3496     104   scsi_request_fn+0x48/0x680
 13)     3392      24   __blk_run_queue+0x39/0x50
 14)     3368     192   cfq_insert_request+0x384/0x550
 15)     3176      56   __elv_add_request+0x1a2/0x2e0
 16)     3120      72   blk_queue_bio+0x35b/0x3a0
 17)     3048      88   generic_make_request+0x10b/0x320
 18)     2960      88   submit_bio+0x70/0x150
 19)     2872      48   _submit_bh+0x127/0x160
 20)     2824      16   submit_bh+0x10/0x20
 21)     2808      88   ext4_read_block_bitmap_nowait+0x48c/0x5f0 [ext4]
 22)     2720     152   ext4_mb_init_cache+0x181/0x6e0 [ext4]
 23)     2568      72   ext4_mb_load_buddy+0x2b6/0x340 [ext4]
 24)     2496     160   ext4_mb_regular_allocator+0x1d7/0x470 [ext4]
 25)     2336     176   ext4_mb_new_blocks+0x658/0xa20 [ext4]
 26)     2160     232   ext4_alloc_branch+0x3b9/0x430 [ext4]
 27)     1928     248   ext4_ind_map_blocks+0x34f/0x7b0 [ext4]
 28)     1680     136   ext4_map_blocks+0x2a5/0x6f0 [ext4]
 29)     1544     104   _ext4_get_block+0x1df/0x220 [ext4]
 30)     1440      16   ext4_get_block+0x16/0x20 [ext4]
 31)     1424     184   __block_write_begin+0x17d/0x4b0
 32)     1240     136   ext4_write_begin+0x18f/0x440 [ext4]
 33)     1104     200   generic_file_buffered_write+0x124/0x2c0
 34)      904     128   __generic_file_aio_write+0x1e2/0x400
 35)      776      64   generic_file_aio_write+0x59/0xa0
 36)      712     184   ext4_file_write+0xdb/0x470 [ext4]
 37)      528     216   do_sync_write+0x93/0xe0
 38)      312      64   vfs_write+0xc0/0x1f0
 39)      248      72   SyS_write+0x7f/0xe0
 40)      176     176   system_call_fastpath+0x1c/0x21

如果新增加了內核模塊,測試時最好能夠監控起來,保證不會棧越界。

如果說 thread_info 在進程運行時訪問很多,比如取當前task_struct指針,設置是否能夠搶占的 preempt_count ,是跟arch體系無關的一些參數,那么thread_struct 就是與體系強相關的

一個結構了,比如x86的架構如下,321位和64位用一些宏來控制。

struct thread_struct {
    /* Cached TLS descriptors: */
    struct desc_struct    tls_array[GDT_ENTRY_TLS_ENTRIES];
    unsigned long        sp0;
    unsigned long        sp;
#ifdef CONFIG_X86_32
    unsigned long        sysenter_cs;
#else
    unsigned long        usersp;    /* Copy from PDA */
    unsigned short        es;
    unsigned short        ds;
    unsigned short        fsindex;
    unsigned short        gsindex;
#endif
#ifdef CONFIG_X86_32
    unsigned long        ip;
#endif
#ifdef CONFIG_X86_64
    unsigned long        fs;
#endif
    unsigned long        gs;
    /* Save middle states of ptrace breakpoints */
    struct perf_event    *ptrace_bps[HBP_NUM];
    /* Debug status used for traps, single steps, etc... */
    unsigned long           debugreg6;
    /* Keep track of the exact dr7 value set by the user */
    unsigned long           ptrace_dr7;
    /* Fault info: */
    unsigned long        cr2;
    unsigned long        trap_nr;
    unsigned long        error_code;
    /* floating point and extended processor state */
    struct fpu        fpu;
#ifdef CONFIG_X86_32
    /* Virtual 86 mode info */
    struct vm86_struct __user *vm86_info;
    unsigned long        screen_bitmap;
    unsigned long        v86flags;
    unsigned long        v86mask;
    unsigned long        saved_sp0;
    unsigned int        saved_fs;
    unsigned int        saved_gs;
#endif
    /* IO permissions: */
    unsigned long        *io_bitmap_ptr;
    unsigned long        iopl;
    /* Max allowed port in the bitmap, in bytes: */
    unsigned        io_bitmap_max;
};

 arm32的長成這樣:

struct thread_struct {
                            /* fault info      */
    unsigned long        address;
    unsigned long        trap_no;
    unsigned long        error_code;
                            /* debugging      */
    struct debug_info    debug;
};

arm64的長成這樣:

struct cpu_context {
    unsigned long x19;
    unsigned long x20;
    unsigned long x21;
    unsigned long x22;
    unsigned long x23;
    unsigned long x24;
    unsigned long x25;
    unsigned long x26;
    unsigned long x27;
    unsigned long x28;
    unsigned long fp;
    unsigned long sp;
    unsigned long pc;
};

struct thread_struct {
    struct cpu_context    cpu_context;    /* cpu context */
    unsigned long        tp_value;
    struct fpsimd_state    fpsimd_state;
    unsigned long        fault_address;    /* fault info */
    struct debug_info    debug;        /* debugging */
};

因為不同的結構,寄存器明顯不一樣,所以cpu的上下文顯然不一樣,這個結構就是用來保存在進程切換的時候,用於特定於arch的進程上下文切換的。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM