Linux進程管理 (1)進程的誕生


專題:Linux進程管理專題

目錄:

Linux進程管理 (1)進程的誕生

Linux進程管理 (2)CFS調度器

Linux進程管理 (3)SMP負載均衡

Linux進程管理 (4)HMP調度器

Linux進程管理 (5)NUMA調度器

Linux進程管理 (6)EAS綠色節能調度器

Linux進程管理 (7)實時調度

Linux進程管理 (8)最新更新與展望

Linux進程管理 (篇外)內核線程

 

關鍵詞:swapper、init_task、fork

Linux內核通常把進程叫作任務,進程控制塊(PCB Processing Control Block)用struct task_struct表示。

線程是輕量級進程,是操作系統做小調度單元,一個進程可以擁有多個線程。

線程之所以被稱為輕量級,是因為共享進程的資源空間。線程和進程使用相同的進程PCB數據結構。

內核使用clone方法創建線程,類似於fork方法,但會確定哪些資源和父進程共享,哪些資源為線程獨享。

1. init進程

init進程也稱為swapper進程或者idle進程,是在Linux啟動是的第一個進程。

idle進程在內核啟動(start_kernel())時靜態創建,所有的核心數據結構都靜態賦值。

當系統沒有進程需要調度時,調度器就會執行idle進程。

start_kernel
  ->rest_init
    ->cpu_startup_entry
      ->cpu_idle_loop

  

1.1 init_task

init_task進程的task_struct數據結構通過INIT_TASK宏來賦值。

/* Initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);

 

INIT_TASK用來填充init_task數據結構。

#define INIT_TASK(tsk)    \
{                                    \
    .state        = 0,                        \
    .stack        = &init_thread_info,                \-------#define init_thread_info (init_thread_union.thread_info)
    .usage        = ATOMIC_INIT(2),                \
    .flags        = PF_KTHREAD,                    \----------表明是一個內核線程
    .prio        = MAX_PRIO-20,                    \----------MAX_PRIO為140,此處prio為120,對應的nice值為0.關於prio和nice參考:prio和nice之間的關系
    .static_prio    = MAX_PRIO-20,                    \
    .normal_prio    = MAX_PRIO-20,                    \
    .policy        = SCHED_NORMAL,                    \-------調度策略是SCHED_NORMAL。
    .cpus_allowed    = CPU_MASK_ALL,                    \
    .nr_cpus_allowed= NR_CPUS,                    \
    .mm        = NULL,                        \
    .active_mm    = &init_mm,                    \------------idle進程的內存管理結構數據
    .restart_block = {                        \
        .fn = do_no_restart_syscall,                \
    },                                \
    .se        = {                        \
        .group_node     = LIST_HEAD_INIT(tsk.se.group_node),    \
    },                                \
    .rt        = {                        \
        .run_list    = LIST_HEAD_INIT(tsk.rt.run_list),    \
        .time_slice    = RR_TIMESLICE,                \
    },                                \
    .tasks        = LIST_HEAD_INIT(tsk.tasks),            \
    INIT_PUSHABLE_TASKS(tsk)                    \
    INIT_CGROUP_SCHED(tsk)                        \
    .ptraced    = LIST_HEAD_INIT(tsk.ptraced),            \
    .ptrace_entry    = LIST_HEAD_INIT(tsk.ptrace_entry),        \
    .real_parent    = &tsk,                        \
    .parent        = &tsk,                        \
    .children    = LIST_HEAD_INIT(tsk.children),            \
    .sibling    = LIST_HEAD_INIT(tsk.sibling),            \
    .group_leader    = &tsk,                        \
    RCU_POINTER_INITIALIZER(real_cred, &init_cred),            \
    RCU_POINTER_INITIALIZER(cred, &init_cred),            \
    .comm        = INIT_TASK_COMM,                \
    .thread        = INIT_THREAD,                    \
    .fs        = &init_fs,                    \
    .files        = &init_files,                    \
    .signal        = &init_signals,                \
    .sighand    = &init_sighand,                \
    .nsproxy    = &init_nsproxy,                \
    .pending    = {                        \
        .list = LIST_HEAD_INIT(tsk.pending.list),        \
        .signal = {{0}}},                    \
    .blocked    = {{0}},                    \
    .alloc_lock    = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),        \
    .journal_info    = NULL,                        \
    .cpu_timers    = INIT_CPU_TIMERS(tsk.cpu_timers),        \
    .pi_lock    = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),    \
    .timer_slack_ns = 50000, /* 50 usec default slack */        \
    .pids = {                            \
        [PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),        \
        [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),        \
        [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),        \
    },                                \
    .thread_group    = LIST_HEAD_INIT(tsk.thread_group),        \
    .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),    \
    INIT_IDS                            \
    INIT_PERF_EVENTS(tsk)                        \
    INIT_TRACE_IRQFLAGS                        \
    INIT_LOCKDEP                            \
    INIT_FTRACE_GRAPH                        \
    INIT_TRACE_RECURSION                        \
    INIT_TASK_RCU_PREEMPT(tsk)                    \
    INIT_TASK_RCU_TASKS(tsk)                    \
    INIT_CPUSET_SEQ(tsk)                        \
    INIT_RT_MUTEXES(tsk)                        \
    INIT_PREV_CPUTIME(tsk)                        \
    INIT_VTIME(tsk)                            \
    INIT_NUMA_BALANCING(tsk)                    \
    INIT_KASAN(tsk)                            \
}

 

1.2 thread_info、thread_union、task_struct關系

thread_union包括thread_info和內核棧;

task_struct的stack指向init_thread_union.thread_info。

 

 

內核棧示意圖

1.2.1 init_thread_info

init_thread_info被__init_task_data修飾,所以它會被固定在.data..init_task段中。

/*
 * Initial thread structure. Alignment of this is handled by a special
 * linker map entry.
 */
union thread_union init_thread_union __init_task_data =
    { INIT_THREAD_INFO(init_task) };


#define __init_task_data __attribute__((__section__(".data..init_task")))

 

下面看看.data..init_task段,在vmlinux.lds.S鏈接文件中定義了大小和位置。

可以看出在_data開始的地方保留了一塊2頁大小的空間,存放init_task_info。

 

SECTIONS
{
...
    .data : AT(__data_loc) {
        _data = .;        /* address in memory */
        _sdata = .;

        /*
         * first, the init task union, aligned
         * to an 8192 byte boundary.
         */
        INIT_TASK_DATA(THREAD_SIZE)------------------------------存放在_data開始地方,2頁大小,即8KB。
...
        _edata = .;
    }
    _edata_loc = __data_loc + SIZEOF(.data);
...
}

#define INIT_TASK_DATA(align)                        \
    . = ALIGN(align);                        \
    *(.data..init_task)


#define THREAD_SIZE_ORDER    1
#define THREAD_SIZE        (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP        (THREAD_SIZE - 8)

 

 

init_thread_info是thread_union聯合體,被固定為8KB大小。

union thread_union {
    struct thread_info thread_info;
    unsigned long stack[THREAD_SIZE/sizeof(long)]; };

 

init_thread_info中包含了struct thread_info類型數據結構,它是由INIT_THREAD_INFO進行初始化。

struct thread_info {
    unsigned long        flags;        /* low level flags */
    int            preempt_count;    /* 0 => preemptable, <0 => bug */
    mm_segment_t        addr_limit;    /* address limit */
    struct task_struct    *task;        /* main task structure */
    struct exec_domain    *exec_domain;    /* execution domain */
    __u32            cpu;        /* cpu */
    __u32            cpu_domain;    /* cpu domain */
    struct cpu_context_save    cpu_context;    /* cpu context */
    __u32            syscall;    /* syscall number */
    __u8            used_cp[16];    /* thread used copro */
    unsigned long        tp_value[2];    /* TLS registers */
#ifdef CONFIG_CRUNCH
    struct crunch_state    crunchstate;
#endif
    union fp_state        fpstate __attribute__((aligned(8)));
    union vfp_state        vfpstate;
#ifdef CONFIG_ARM_THUMBEE
    unsigned long        thumbee_state;    /* ThumbEE Handler Base register */
#endif
};

#define INIT_THREAD_INFO(tsk)                        \
{                                    \
    .task        = &tsk,                        \
    .exec_domain    = &default_exec_domain,                \
    .flags        = 0,                        \
    .preempt_count    = INIT_PREEMPT_COUNT,                \
    .addr_limit    = KERNEL_DS,                    \
    .cpu_domain    = domain_val(DOMAIN_USER, DOMAIN_MANAGER) |    \
              domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) |    \
              domain_val(DOMAIN_IO, DOMAIN_CLIENT),        \
}

 

1.2.2 init_task內核棧

ARM32處理器從匯編跳轉到C語言的入口點start_kernel()函數之前,設置了SP寄存器指向8KB內核棧頂部區域,其中預留了8B空洞。

/*
 * The following fragment of code is executed with the MMU on in MMU mode,
 * and uses absolute addresses; this is not position independent.
 *
 *  r0  = cp#15 control register
 *  r1  = machine ID
 *  r2  = atags/dtb pointer
 *  r9  = processor ID
 */
    __INIT
__mmap_switched:
    adr    r3, __mmap_switched_data

    ldmia    r3!, {r4, r5, r6, r7}
...
 ARM(    ldmia    r3, {r4, r5, r6, r7, sp})
 THUMB(    ldmia    r3, {r4, r5, r6, r7}    )
 THUMB(    ldr    sp, [r3, #16]        )
...
    b    start_kernel------------------------------------------------跳轉到start_kernel函數
ENDPROC(__mmap_switched)

    .align    2
    .type    __mmap_switched_data, %object __mmap_switched_data:
    .long    __data_loc            @ r4
    .long    _sdata                @ r5
    .long    __bss_start            @ r6
    .long    _end                @ r7
    .long    processor_id            @ r4
    .long    __machine_arch_type        @ r5
    .long    __atags_pointer            @ r6
#ifdef CONFIG_CPU_CP15
    .long    cr_alignment            @ r7
#else
    .long    0                @ r7
#endif
    .long    init_thread_union + THREAD_START_SP @ sp-----------------定義了SP寄存器的值,指向8KB棧空間頂部。
    .size    __mmap_switched_data, . - __mmap_switched_data

 

1.2.3 從sp到current逆向查找

內核中用一個current常量獲取當前進程task_structg數據結構,從sp到current的流程如下:

  1. 通過SP寄存器獲取當前內核棧指針。
  2. 棧指針對齊后獲取struct thread_info數據結構指針
  3. 通過thread_info->task成員獲取task_struct數據結構

可以和內核棧示意圖結合看。

#define get_current() (current_thread_info()->task)
#define current get_current()

/*
 * how to get the current stack pointer in C
 */
register unsigned long current_stack_pointer asm ("sp");

/*
 * how to get the thread information struct from C
 */
static inline struct thread_info *current_thread_info(void) __attribute_const__;

static inline struct thread_info *current_thread_info(void)
{
    return (struct thread_info *)
        (current_stack_pointer & ~(THREAD_SIZE - 1));
}

 

2. fork

 Linux通過fork、vfork、clone等系統調用來建立線程或進程,在內核中這三個系統調用都通過一個函數來實現,即do_fork()。也包括內核線程kernel_thread。

do_fork定義在fork.c中,下面四個封裝接口的區別就在於其傳遞的參數。

/*
 * Create a kernel thread.
 */
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
    return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
        (unsigned long)arg, NULL, NULL);
}

SYSCALL_DEFINE0(fork)
{
    return do_fork(SIGCHLD, 0, 0, NULL, NULL);
}

SYSCALL_DEFINE0(vfork)
{
    return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
            0, NULL, NULL);
}

SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
         int __user *, parent_tidptr,
         int, tls_val,
         int __user *, child_tidptr)
{
    return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
}

 

fork只使用用了SIGCHLD標志位在紫禁城終止后發送SIGCHLD信號通知父進程。fork是重量級應用,為子進程建立了一個基於父進程的完整副本,然后子進程基於此運行。

但是采用了COW技術,子進程只復制父進程頁表,而不復制頁面內容。當子進程需要寫入內容時才觸發寫時復制機制,為子進程創建一個副本。

 

vfork比fork多了連個標志位:CLONE_VFORK表示父進程會被掛起,直至子進程釋放虛擬內存資源;CLONE_VM表示父子進程運行在相同的內存空空間中。

在fork實現COW技術后,vfork意義已經不大。

 

clone用於創建線程,並且參數通過寄存器從用戶空間傳遞下來,通常會指定新的棧地址newsp。借助clone_flags,clone給了用戶更大的選擇空間,他可以是fork/vfork,也可以和父進程共用資源。

 

kernel_thread用於創建內核線程,CLONE_VM表示和父進程共享內存資源;CLONE_UNTRACED表示線程不能被設置CLONE_PTRACE。

 

簡單來說fork重,vfork趨淘汰,clone輕,kernel_thread內核。

2.1 do_fork及其參數解釋

do_fork有5個參數:

  • clone_flags:創建進程的標志位集合
  • stack_start:用戶態棧的起始地址
  • stack_size:用戶態棧的大小
  • parent_tidptr和child_tidptr:指向用戶空間地址的兩個指針,分別指向父子進程PID。

其中clone_flags是影響do_fork行為的重要參數:

/*
 * cloning flags:
 */
#define CSIGNAL        0x000000ff    /* signal mask to be sent at exit */
#define CLONE_VM    0x00000100    /* set if VM shared between processes */-------------------------父子進程運行在同一個虛擬空間
#define CLONE_FS    0x00000200    /* set if fs info shared between processes */--------------------父子進程共享文件系統信息
#define CLONE_FILES    0x00000400    /* set if open files shared between processes */--------------父子進程共享文件描述符表
#define CLONE_SIGHAND    0x00000800    /* set if signal handlers and blocked signals shared */-----父子進程共享信號處理函數表
#define CLONE_PTRACE    0x00002000    /* set if we want to let tracing continue on the child too */---------父進程被跟蹤ptrace,子進程也會被跟蹤。
#define CLONE_VFORK    0x00004000    /* set if the parent wants the child to wake it up on mm_release */----在創建子進程時啟動完成機制completion,wait_for_completion()會使父進程進入睡眠等待,知道子進程調用execve()或exit()釋放虛擬內存資源。
#define CLONE_PARENT    0x00008000    /* set if we want to have the same parent as the cloner */------------新創建的進程是兄弟關系,而不是父子關系。
#define CLONE_THREAD    0x00010000    /* Same thread group? */
#define CLONE_NEWNS    0x00020000    /* New mount namespace group */------------父子進程不共享mount namespace
#define CLONE_SYSVSEM    0x00040000    /* share system V SEM_UNDO semantics */--
#define CLONE_SETTLS    0x00080000    /* create a new TLS for the child */
#define CLONE_PARENT_SETTID    0x00100000    /* set the TID in the parent */
#define CLONE_CHILD_CLEARTID    0x00200000    /* clear the TID in the child */
#define CLONE_DETACHED        0x00400000    /* Unused, ignored */
#define CLONE_UNTRACED        0x00800000    /* set if the tracing process can't force CLONE_PTRACE on this clone */
#define CLONE_CHILD_SETTID    0x01000000    /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
   and is now available for re-use. */
#define CLONE_NEWUTS        0x04000000    /* New utsname namespace */
#define CLONE_NEWIPC        0x08000000    /* New ipc namespace */
#define CLONE_NEWUSER        0x10000000    /* New user namespace */----------子進程要創建新的User Namespace。
#define CLONE_NEWPID        0x20000000    /* New pid namespace */------------創建一個新的PID namespace。
#define CLONE_NEWNET        0x40000000    /* New network namespace */
#define CLONE_IO        0x80000000    /* Clone io context */

 主要函數調用路徑如下:

do_fork------------------------------------------
  ->copy_process---------------------------------
    ->dup_task_struct----------------------------
    ->sched_fork---------------------------------
    ->copy_files
    ->copy_fs
    ->copy_sighand
    ->copy_signal
    ->copy_mm------------------------------------
      ->dup_mm-----------------------------------
    ->copy_namespaces
    ->copy_io
    ->copy_thread--------------------------------

 

 

do_fork()先對CLONE_UNTRACED進行簡單檢查,主要將工作交給copy_process進行處理,最后喚醒創建的進程。

/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{
    struct task_struct *p;
    int trace = 0;
    long nr;

    /*
     * Determine whether and which event to report to ptracer.  When
     * called from kernel_thread or CLONE_UNTRACED is explicitly
     * requested, no event is reported; otherwise, report if the event
     * for the type of forking is enabled.
     */
    if (!(clone_flags & CLONE_UNTRACED)) {
        if (clone_flags & CLONE_VFORK)
            trace = PTRACE_EVENT_VFORK;
        else if ((clone_flags & CSIGNAL) != SIGCHLD)
            trace = PTRACE_EVENT_CLONE;
        else
            trace = PTRACE_EVENT_FORK;

        if (likely(!ptrace_event_enabled(current, trace)))
            trace = 0;
    }

    p = copy_process(clone_flags, stack_start, stack_size,
             child_tidptr, NULL, trace);
    /*
     * Do this prior waking up the new thread - the thread pointer
     * might get invalid after that point, if the thread exits quickly.
     */
    if (!IS_ERR(p)) {
        struct completion vfork;
        struct pid *pid;

        trace_sched_process_fork(current, p);

        pid = get_task_pid(p, PIDTYPE_PID);
        nr = pid_vnr(pid);

        if (clone_flags & CLONE_PARENT_SETTID)
            put_user(nr, parent_tidptr);

        if (clone_flags & CLONE_VFORK) {------------------對於CLONE_VFORK標志位,初始化vfork完成量
            p->vfork_done = &vfork;
            init_completion(&vfork);
            get_task_struct(p);
        }

        wake_up_new_task(p);------------------------------喚醒新創建的進程p,也即把進程加入調度器里接受調度執行。 /* forking complete and child started to run, tell ptracer */
        if (unlikely(trace))
            ptrace_event_pid(trace, pid);

        if (clone_flags & CLONE_VFORK) {
            if (!wait_for_vfork_done(p, &vfork))---------等待子進程釋放p->vfork_done完成量
                ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
        }

        put_pid(pid);
    } else {
        nr = PTR_ERR(p);
    }
    return nr;
}

 

 

2.2 copy_process

 include/linux/sched.h中定義了進程標志位:

/*
 * Per process flags
 */
#define PF_EXITING    0x00000004    /* getting shut down */
#define PF_EXITPIDONE    0x00000008    /* pi exit done on shut down */
#define PF_VCPU        0x00000010    /* I'm a virtual CPU */
#define PF_WQ_WORKER    0x00000020    /* I'm a workqueue worker */
#define PF_FORKNOEXEC    0x00000040    /* forked but didn't exec */
#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
#define PF_SUPERPRIV    0x00000100    /* used super-user privileges */
#define PF_DUMPCORE    0x00000200    /* dumped core */
#define PF_SIGNALED    0x00000400    /* killed by a signal */
#define PF_MEMALLOC    0x00000800    /* Allocating memory */
#define PF_NPROC_EXCEEDED 0x00001000    /* set_user noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH    0x00002000    /* if unset the fpu must be initialized before use */
#define PF_USED_ASYNC    0x00004000    /* used async_schedule*(), used by module init */
#define PF_NOFREEZE    0x00008000    /* this thread should not be frozen */
#define PF_FROZEN    0x00010000    /* frozen for system suspend */
#define PF_FSTRANS    0x00020000    /* inside a filesystem transaction */
#define PF_KSWAPD    0x00040000    /* I am kswapd */
#define PF_MEMALLOC_NOIO 0x00080000    /* Allocating memory without IO involved */
#define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
#define PF_KTHREAD    0x00200000    /* I am a kernel thread */
#define PF_RANDOMIZE    0x00400000    /* randomize virtual address space */
#define PF_SWAPWRITE    0x00800000    /* Allowed to write to swap */
#define PF_NO_SETAFFINITY 0x04000000    /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
#define PF_MUTEX_TESTER    0x20000000    /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP    0x40000000    /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */

 

copy_process借助current獲取當前進程的task_struct數據結構,然后創建新進程數據結構task_struct並復制父進程內容,繼續初始化進程主要部分,比如內存空間、文件句柄、文件系統、IO、等等。 

/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
static struct task_struct *copy_process(unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace)
{
    int retval;
    struct task_struct *p;

    if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
        return ERR_PTR(-EINVAL);

    if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))---------------CLONE_FS(父子進程共享文件系統)和CLONE_NEWNS/CLONE_NEWUSER(父子進程不共享mount/user namespace)沖突, return ERR_PTR(-EINVAL);

    /*
     * Thread groups must share signals as well, and detached threads
     * can only be started up within the thread group.
     */
    if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))--------------------線程組共享信號處理函數
        return ERR_PTR(-EINVAL);

    /*
     * Shared signal handlers imply shared VM. By way of the above,
     * thread groups also imply shared VM. Blocking this case allows
     * for various simplifications in other code.
     */
    if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))----------------------共享信號處理函數需要共享內存空間 return ERR_PTR(-EINVAL);

    /*
     * Siblings of global init remain as zombies on exit since they are
     * not reaped by their parent (swapper). To solve this and to avoid
     * multi-rooted process trees, prevent global and container-inits
     * from creating siblings.
     */
    if ((clone_flags & CLONE_PARENT) &&
                current->signal->flags & SIGNAL_UNKILLABLE)-----------------------------init是所有用戶空間進程父進程,如果和init兄弟關系,那么進程將無法被回收,從而變成僵屍進程。 return ERR_PTR(-EINVAL);

    /*
     * If the new process will be in a different pid or user namespace
     * do not allow it to share a thread group or signal handlers or
     * parent with the forking task.
     */
    if (clone_flags & CLONE_SIGHAND) {---------------------------------------------------新的pid或user命名空間和共享信號處理以及線程組沖突,因為他們在namespace中訪問隔離。 if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
            (task_active_pid_ns(current) !=
                current->nsproxy->pid_ns_for_children))
            return ERR_PTR(-EINVAL);
    }

    retval = security_task_create(clone_flags);
    if (retval)
        goto fork_out;

    retval = -ENOMEM;
    p = dup_task_struct(current);-------------------------------------------------------分配一個task_struct實例,將當前進程current作為母板。 if (!p)
        goto fork_out;

    ftrace_graph_init_task(p);

    rt_mutex_init_task(p);

#ifdef CONFIG_PROVE_LOCKING
    DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
    DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
    retval = -EAGAIN;
    if (atomic_read(&p->real_cred->user->processes) >=
            task_rlimit(p, RLIMIT_NPROC)) {
        if (p->real_cred->user != INIT_USER &&
            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
            goto bad_fork_free;
    }
    current->flags &= ~PF_NPROC_EXCEEDED;

    retval = copy_creds(p, clone_flags);
    if (retval < 0)
        goto bad_fork_free;

    /*
     * If multiple threads are within copy_process(), then this check
     * triggers too late. This doesn't hurt, the check is only there
     * to stop root fork bombs.
     */
    retval = -EAGAIN;
    if (nr_threads >= max_threads)----------------------------------------------max_threads是系統允許最多線程個數,nr_threads是系統當前進程個數。 goto bad_fork_cleanup_count;

    if (!try_module_get(task_thread_info(p)->exec_domain->module))
        goto bad_fork_cleanup_count;

    delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
    p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);---------------------------------告訴系統不使用超級用戶權限,並且不是workqueue內核線程。
    p->flags |= PF_FORKNOEXEC;--------------------------------------------------執行fork但不立即執行
    INIT_LIST_HEAD(&p->children);-----------------------------------------------新進程的子進程鏈表
    INIT_LIST_HEAD(&p->sibling);------------------------------------------------新進程的兄弟進程鏈表
    rcu_copy_process(p);
    p->vfork_done = NULL;
    spin_lock_init(&p->alloc_lock);

    init_sigpending(&p->pending);

    p->utime = p->stime = p->gtime = 0;
    p->utimescaled = p->stimescaled = 0;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    seqlock_init(&p->vtime_seqlock);
    p->vtime_snap = 0;
    p->vtime_snap_whence = VTIME_SLEEPING;
#endif

#if defined(SPLIT_RSS_COUNTING)
    memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif

    p->default_timer_slack_ns = current->timer_slack_ns;

    task_io_accounting_init(&p->ioac);
    acct_clear_integrals(p);

    posix_cpu_timers_init(p);

    p->start_time = ktime_get_ns();
    p->real_start_time = ktime_get_boot_ns();
    p->io_context = NULL;
    p->audit_context = NULL;
    if (clone_flags & CLONE_THREAD)
        threadgroup_change_begin(current);
    cgroup_fork(p);
#ifdef CONFIG_NUMA
    p->mempolicy = mpol_dup(p->mempolicy);
    if (IS_ERR(p->mempolicy)) {
        retval = PTR_ERR(p->mempolicy);
        p->mempolicy = NULL;
        goto bad_fork_cleanup_threadgroup_lock;
    }
#endif...
#ifdef CONFIG_BCACHE
    p->sequential_io    = 0;
    p->sequential_io_avg    = 0;
#endif

    /* Perform scheduler related setup. Assign this task to a CPU. */
    retval = sched_fork(clone_flags, p);-----------------------------------------初始化進程調度相關數據結構,將進程指定到某一CPU上。 if (retval)
        goto bad_fork_cleanup_policy;

    retval = perf_event_init_task(p);                                                                                         
    if (retval)
        goto bad_fork_cleanup_policy;
    retval = audit_alloc(p);
    if (retval)
        goto bad_fork_cleanup_perf;
    /* copy all the process information */
    shm_init_task(p);
    retval = copy_semundo(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_audit;
    retval = copy_files(clone_flags, p);-----------------------------------------復制父進程打開的文件信息 if (retval)
        goto bad_fork_cleanup_semundo;
    retval = copy_fs(clone_flags, p);--------------------------------------------復制父進程fs_struct信息 if (retval)
        goto bad_fork_cleanup_files;
    retval = copy_sighand(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_fs;
    retval = copy_signal(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_sighand;
    retval = copy_mm(clone_flags, p);--------------------------------------------復制父進程的內存管理相關信息 if (retval)
        goto bad_fork_cleanup_signal;
    retval = copy_namespaces(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_mm;
    retval = copy_io(clone_flags, p);--------------------------------------------復制父進程的io_context上下文信息 if (retval)
        goto bad_fork_cleanup_namespaces;
    retval = copy_thread(clone_flags, stack_start, stack_size, p);
    if (retval)
        goto bad_fork_cleanup_io;

    if (pid != &init_struct_pid) {
        retval = -ENOMEM;
        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
        if (!pid)
            goto bad_fork_cleanup_io;
    }

    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
     * Clear TID on mm_release()?
     */
    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
    p->plug = NULL;
#endif
#ifdef CONFIG_FUTEX
    p->robust_list = NULL;
#ifdef CONFIG_COMPAT
    p->compat_robust_list = NULL;
#endif
    INIT_LIST_HEAD(&p->pi_state_list);
    p->pi_state_cache = NULL;
#endif
    /*
     * sigaltstack should be cleared when sharing the same VM
     */
    if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
        p->sas_ss_sp = p->sas_ss_size = 0;

    /*
     * Syscall tracing and stepping should be turned off in the
     * child regardless of CLONE_PTRACE.
     */
    user_disable_single_step(p);
    clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
    clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
    clear_all_latency_tracing(p);

    /* ok, now we should be set up.. */
    p->pid = pid_nr(pid);-------------------------------------------------------獲取新進程的pid if (clone_flags & CLONE_THREAD) {
        p->exit_signal = -1;
        p->group_leader = current->group_leader;
        p->tgid = current->tgid;
    } else {
        if (clone_flags & CLONE_PARENT)
            p->exit_signal = current->group_leader->exit_signal;
        else
            p->exit_signal = (clone_flags & CSIGNAL);
        p->group_leader = p;
        p->tgid = p->pid;
    }

    p->nr_dirtied = 0;
    p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
    p->dirty_paused_when = 0;

    p->pdeath_signal = 0;
    INIT_LIST_HEAD(&p->thread_group);
    p->task_works = NULL;

    /*
     * Make it visible to the rest of the system, but dont wake it up yet.
     * Need tasklist lock for parent etc handling!
     */
    write_lock_irq(&tasklist_lock);

    /* CLONE_PARENT re-uses the old parent */
    if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
        p->real_parent = current->real_parent;
        p->parent_exec_id = current->parent_exec_id;
    } else {
        p->real_parent = current;
        p->parent_exec_id = current->self_exec_id;
    }

    spin_lock(&current->sighand->siglock);

    /*
     * Copy seccomp details explicitly here, in case they were changed
     * before holding sighand lock.
     */
    copy_seccomp(p);

    /*
     * Process group and session signals need to be delivered to just the
     * parent before the fork or both the parent and the child after the
     * fork. Restart if a signal comes in before we add the new process to
     * it's process group.
     * A fatal signal pending means that current will exit, so the new
     * thread can't slip out of an OOM kill (or normal SIGKILL).
    */
    recalc_sigpending();
    if (signal_pending(current)) {
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        retval = -ERESTARTNOINTR;
        goto bad_fork_free_pid;
    }

    if (likely(p->pid)) {
        ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

        init_task_pid(p, PIDTYPE_PID, pid);
        if (thread_group_leader(p)) {
            init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
            init_task_pid(p, PIDTYPE_SID, task_session(current));

            if (is_child_reaper(pid)) {
                ns_of_pid(pid)->child_reaper = p;
                p->signal->flags |= SIGNAL_UNKILLABLE;
            }

            p->signal->leader_pid = pid;
            p->signal->tty = tty_kref_get(current->signal->tty);
            list_add_tail(&p->sibling, &p->real_parent->children);
            list_add_tail_rcu(&p->tasks, &init_task.tasks);
            attach_pid(p, PIDTYPE_PGID);
            attach_pid(p, PIDTYPE_SID);
            __this_cpu_inc(process_counts);
        } else {
            current->signal->nr_threads++;
            atomic_inc(&current->signal->live);
            atomic_inc(&current->signal->sigcnt);
            list_add_tail_rcu(&p->thread_group,
                      &p->group_leader->thread_group);
            list_add_tail_rcu(&p->thread_node,
                      &p->signal->thread_head);
        }
        attach_pid(p, PIDTYPE_PID);
        nr_threads++;---------------------------------------------------------當前進程計數遞增
    }

    total_forks++;
    spin_unlock(&current->sighand->siglock);
    syscall_tracepoint_update(p);
    write_unlock_irq(&tasklist_lock);

    proc_fork_connector(p);
    cgroup_post_fork(p);
    if (clone_flags & CLONE_THREAD)
        threadgroup_change_end(current);
    perf_event_fork(p);

    trace_task_newtask(p, clone_flags);
    uprobe_copy_process(p, clone_flags);

    return p;----------------------------------------------------------------成功返回新進程的task_struct。
...return ERR_PTR(retval);---------------------------------------------------各種錯誤處理
}

 dup_task_struct從父進程復制task_struct和thread_info。

 

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
    struct task_struct *tsk;
    struct thread_info *ti;
    int node = tsk_fork_get_node(orig);
    int err;

    tsk = alloc_task_struct_node(node);-------------------------------------------------分配一個task_struct結構體 if (!tsk)
        return NULL;

    ti = alloc_thread_info_node(tsk, node);---------------------------------------------分配一個thread_info結構體 if (!ti)
        goto free_tsk;

    err = arch_dup_task_struct(tsk, orig);----------------------------------------------將父進程的task_struct拷貝到新進程tsk if (err)
        goto free_ti;

    tsk->stack = ti;--------------------------------------------------------------------將新進程的棧指向創建的thread_info。
#ifdef CONFIG_SECCOMP
    /*
     * We must handle setting up seccomp filters once we're under
     * the sighand lock in case orig has changed between now and
     * then. Until then, filter must be NULL to avoid messing up
     * the usage counts on the error path calling free_task.
     */
    tsk->seccomp.filter = NULL;
#endif

    setup_thread_stack(tsk, orig);------------------------------------------------------將父進程的thread_info復制到子進程thread_info,並將子進程thread_info->task指向子進程
    clear_user_return_notifier(tsk);
    clear_tsk_need_resched(tsk);
    set_task_stack_end_magic(tsk);
...return tsk;
...
}

進程相關運行狀態有:

#define TASK_RUNNING        0
#define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 #define __TASK_STOPPED 4 #define __TASK_TRACED 8

 

 sched_fork的主要任務交給__sched_fork(),然后根據優先級選擇調度sched_class類,並執行其task_fork。

最后設置新進程運行的CPU,如果不是當前CPU則需要遷移過來。

/*
 * fork()/clone()-time setup:
 */
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
    unsigned long flags;
    int cpu = get_cpu();-------------------------------------------------------首先關閉內核搶占,然后獲取當前CPU id。

    __sched_fork(clone_flags, p);----------------------------------------------填充sched_entity數據結構,初始化調度相關設置。 /*
     * We mark the process as running here. This guarantees that
     * nobody will actually run it, and a signal or other external
     * event cannot wake it up and insert it on the runqueue either.
     */
    p->state = TASK_RUNNING;---------------------------------------------------設置為運行狀態,雖然還沒有實際運行。 /*
     * Make sure we do not leak PI boosting priority to the child.
     */
    p->prio = current->normal_prio;--------------------------------------------繼承父進程normal_prio作為子進程prio /*
     * Revert to default priority/policy on fork if requested.
     */
    if (unlikely(p->sched_reset_on_fork)) {
        if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
            p->policy = SCHED_NORMAL;
            p->static_prio = NICE_TO_PRIO(0);
            p->rt_priority = 0;
        } else if (PRIO_TO_NICE(p->static_prio) < 0)
            p->static_prio = NICE_TO_PRIO(0);

        p->prio = p->normal_prio = __normal_prio(p);
        set_load_weight(p);

        /*
         * We don't need the reset flag anymore after the fork. It has
         * fulfilled its duty:
         */
        p->sched_reset_on_fork = 0;
    }

    if (dl_prio(p->prio)) {---------------------------------------------------SCHED_DEADLINE優先級應該是負值,即小於0。
        put_cpu();
        return -EAGAIN;
    } else if (rt_prio(p->prio)) {--------------------------------------------SCHED_RT優先級為0-99
        p->sched_class = &rt_sched_class;
    } else {------------------------------------------------------------------SCHED_FAIR優先級為100-139
        p->sched_class = &fair_sched_class;
    }

    if (p->sched_class->task_fork)
        p->sched_class->task_fork(p);

    /*
     * The child is not yet in the pid-hash so no cgroup attach races,
     * and the cgroup is pinned to this child due to cgroup_fork()
     * is ran before sched_fork().
     *
     * Silence PROVE_RCU.
     */
    raw_spin_lock_irqsave(&p->pi_lock, flags);
    set_task_cpu(p, cpu);------------------------------------------------------重要一點就是檢查p->stack->cpu是不是當期CPU,如果不是則需要進行遷移。遷移函數使用之前確定的sched_class->migrate_task_rq。
    raw_spin_unlock_irqrestore(&p->pi_lock, flags);

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    if (likely(sched_info_on()))
        memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP)
    p->on_cpu = 0;
#endif
    init_task_preempt_count(p);
#ifdef CONFIG_SMP
    plist_node_init(&p->pushable_tasks, MAX_PRIO);
    RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif

    put_cpu();-----------------------------------------------------------------再次允許內核搶占。 return 0;
}

 

copy_mm首先設置MM相關參數,然后使用dup_mm來分配mm_struct數據結構,並從父進程復制到新進程mm_struct。

最后將創建的mm_struct復制給task_struct->mm。

 

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
    struct mm_struct *mm, *oldmm;
    int retval;

    tsk->min_flt = tsk->maj_flt = 0;
    tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
    tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
#endif

    tsk->mm = NULL;
    tsk->active_mm = NULL;

    /*
     * Are we cloning a kernel thread?
     *
     * We need to steal a active VM for that..
     */
    oldmm = current->mm;
    if (!oldmm)-----------------------------------------------如果current->mm為NULL,表示是內核線程。 return 0;

    /* initialize the new vmacache entries */
    vmacache_flush(tsk);

    if (clone_flags & CLONE_VM) {----------------------------CLONE_VM表示父子進程共享內存空間,依次沒必要新建內存空間,直接使用oldmm。
        atomic_inc(&oldmm->mm_users);
        mm = oldmm;
        goto good_mm;
    }

    retval = -ENOMEM;
    mm = dup_mm(tsk);---------------------------------------為子進程單獨創建一個新的內存空間mm_struct。 if (!mm)
        goto fail_nomem;

good_mm:
    tsk->mm = mm;-------------------------------------------對新進程內存空間進行賦值。
    tsk->active_mm = mm;
    return 0;

fail_nomem:
    return retval;
}

 dup_task從父進程復制mm_struct,然后進行初始化等操作,將完成的mm_struct返回給copy_mm。

/*
 * Allocate a new mm structure and copy contents from the
 * mm structure of the passed in task structure.
 */
static struct mm_struct *dup_mm(struct task_struct *tsk)
{
    struct mm_struct *mm, *oldmm = current->mm;
    int err;

    mm = allocate_mm();-----------------------------------分配一個mm_struct數據結構 if (!mm)
        goto fail_nomem;

    memcpy(mm, oldmm, sizeof(*mm));-----------------------將父進程mm_struct復制到新進程mm_struct。 if (!mm_init(mm, tsk))--------------------------------主要對子進程的mm_struct成員進行初始化,雖然從父進程復制了相關數據,但是對於子進程需要重新進行初始化。 goto fail_nomem;

    dup_mm_exe_file(oldmm, mm);

    err = dup_mmap(mm, oldmm);----------------------------將父進程種所有VMA對應的pte頁表項內容都復制到子進程對應的PTE頁表項中。 if (err)
        goto free_pt;

    mm->hiwater_rss = get_mm_rss(mm);
    mm->hiwater_vm = mm->total_vm;

    if (mm->binfmt && !try_module_get(mm->binfmt->module))
        goto free_pt;

    return mm;
...
}

 對ARM體系結構,Linux內核棧頂存放着ARM通用寄存器struct pt_regs。

struct pt_regs {
    unsigned long uregs[18];
};

#define ARM_cpsr    uregs[16]
#define ARM_pc        uregs[15]
#define ARM_lr        uregs[14]
#define ARM_sp        uregs[13]
#define ARM_ip        uregs[12]
#define ARM_fp        uregs[11]
#define ARM_r10        uregs[10]
#define ARM_r9        uregs[9]
#define ARM_r8        uregs[8]
#define ARM_r7        uregs[7]
#define ARM_r6        uregs[6]
#define ARM_r5        uregs[5]
#define ARM_r4        uregs[4]
#define ARM_r3        uregs[3]
#define ARM_r2        uregs[2]
#define ARM_r1        uregs[1]
#define ARM_r0        uregs[0]
#define ARM_ORIG_r0    uregs[17]

 

關於pt_regs在內核棧的位置,可以看出首先通過task_stack_page(p)站到內核棧起始地址,即底部。

然后加上地址THREAD_START_SP,即THREAD_SIZE兩個頁面8KB減去8字節空洞。

所以childregs指向的位置是棧頂部。

#define task_pt_regs(p) \
    ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)

 

copy_thread首先獲取棧頂pt_regs位置,然后填充thread_info->cpu_context進程上下文。

asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
        unsigned long stk_sz, struct task_struct *p)
{
    struct thread_info *thread = task_thread_info(p);--------------------------獲取當前進程的thread_info。 struct pt_regs *childregs = task_pt_regs(p);-------------------------------獲取當前進程的pt_regs

    memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));----------cpu_context中保存了進程上下文相關的通用寄存器。 if (likely(!(p->flags & PF_KTHREAD))) {------------------------------------內核線程處理 *childregs = *current_pt_regs();
        childregs->ARM_r0 = 0;
        if (stack_start)
            childregs->ARM_sp = stack_start;
    } else {-------------------------------------------------------------------普通線程處理,r4等於stk_sz,r5指向start_start。
        memset(childregs, 0, sizeof(struct pt_regs));
        thread->cpu_context.r4 = stk_sz;
        thread->cpu_context.r5 = stack_start;
        childregs->ARM_cpsr = SVC_MODE;
    }
    thread->cpu_context.pc = (unsigned long)ret_from_fork;---------------------cpu_context中pc指向ret_from_fork
    thread->cpu_context.sp = (unsigned long)childregs;-------------------------cpu_context中sp指向新進程的內核棧

    clear_ptrace_hw_breakpoint(p);

    if (clone_flags & CLONE_SETTLS)
        thread->tp_value[0] = childregs->ARM_r3;
    thread->tp_value[1] = get_tpuser();

    thread_notify(THREAD_NOTIFY_COPY, thread);

    return 0;
}

 

 

 

3. 關於fork()、vfork()、clone()測試

3.1 fork()嵌套打印

3.1.1 代碼

#include <stdio.h>

int main(void)
{
  int i;

  for(i = 0; i<2; i++) {
    fork();
    printf("_%d-%d-%d\n", getppid(), getpid(), i);
  }
  wait(NULL);
  wait(NULL);
  return 0;
}

 

 3.1.2 執行程序,記錄log

執行輸出結果如下:

sudo trace-cmd record  -e all  ./fork
/sys/kernel/tracing/events/*/filter
Current:4293-i=0
Current:4293-i=1
Current:4294-i=0
Current:4294-i=1
Current:4295-i=1
Current:4296-i=1

相關Trace記錄在trace.dat中。

3.1.3 流程分析

使用kernelshark trace.dat,過濾sched_process_fork/sys_enter_write/sys_enter_wait4后結果如下。

其中sched_process_fork對應fork,sys_enter_write對應printf,sys_enter_wait4對應wait開始,sys_exit_wait4對應wait結束。

下圖是不同進程的流程:

 將fork進程關系流程圖畫出如下:

 

 

參考文檔:《linux中fork()函數詳解(原創!!實例講解)

 

3.2 fork()、vfork()、clone()對比

對於fork()、vfork()、clone()三者的區別,前面已經有介紹,下面通過實例來看他們之間的區別。

3.2.1 fork()和vfork()對比

#include "stdio.h"

int main() {
  int count = 1;
  int child;
  printf("Father, initial count = %d, pid = %d\n", count, getpid());
  if(!(child = fork())) {
    int i;
    for(i = 0; i < 2; i++) {
      printf("Son, count = %d pid = %d\n", ++count, getpid());
    }
    exit(1);
  } else {
sleep(1); printf(
"Father, count = %d pid = %d child = %d\n", count, getpid(), child); } } #include "stdio.h" int main() { int count = 1; int child; printf("Father, initial count = %d, pid = %d\n", count, getpid()); if(!(child = vfork())) { int i; for(i = 0; i < 2; i++) { printf("Son, count = %d pid = %d\n", ++count, getpid()); } exit(1); } else { printf("Father, count = %d pid = %d child = %d\n", count, getpid(), child); } }

 

 

fork輸出結果如下:

Father, initial count = 1, pid = 4721
Father, count = 1 pid = 4721 child = 4722
Son, count = 2 pid = 4722
Son, count = 3 pid = 4722

vfork輸出結果如下:

Father, initial count = 1, pid = 4726
Son, count = 2 pid = 4727
Son, count = 3 pid = 4727
Father, count = 3 pid = 4726 child = 4727

將fork代碼加sleep(1);之后結果如下:

Father, initial count = 1, pid = 4858
Son, count = 2 pid = 4859
Son, count = 3 pid = 4859
Father, count = 1 pid = 4858 child = 4859

1. 可以看出vfork父進程在等待子進程結束,然后繼續執行。

2. vfork父子進程之間共享地址空間,父進程的count被子進程修改。

3. fork將父進程打印延時后,可以看出主進程任然打印count=1,說明父子進程空間獨立。

 

3.2.2 clone不同flag對比

clone的flag決定了clone的行為,比如是否共享空間、是否vfork等

#define _GNU_SOURCE

#include "stdio.h"
#include "sched.h"
#include "signal.h"
#define FIBER_STACK 8192
int count;
void * stack;
int do_something(){
  int i;
  for(i = 0; i < 2; i++) {
    printf("Son, pid = %d, count = %d\n", getpid(), ++count);
  }
  free(stack); //這里我也不清楚,如果這里不釋放,不知道子線程死亡后,該內存是否會釋放,知情者可以告訴下,謝謝
  exit(1);
}

int main() {
  void * stack;
  count = 1;
  stack = malloc(FIBER_STACK);//為子進程申請系統堆棧
  if(!stack) {
    printf("The stack failed\n");
    exit(0);
  }
  printf("Father, initial count = %d, pid = %d\n", count, getpid());
  clone(&do_something, (char *)stack + FIBER_STACK, CLONE_VM|CLONE_VFORK, 0);//創建子線程
  printf("Father, pid = %d count = %d\n", getpid(), count);
  exit(1);
}

 

下面是不同flag組合的輸出結果:

1. CLONE_VM|CLONE_VFORK
父子進程共享內存空間,並且父進程要等待子進程結束。
所以4968在4969結束之后才繼續運行,並且count=3。

Father, initial count = 1, pid = 4968
Son, pid = 4969, count = 2
Son, pid = 4969, count = 3
Father, pid = 4968 count = 3


2. CLONE_VM
父子進程共享內存空間,但是父進程結束時強制子進程退出。

Father, initial count = 1, pid = 5017
Father, pid = 5017 count = 1

將父進程printf前加一個sleep(1),可以看出父進程count=1。

Father, initial count = 1, pid = 5065
Son, pid = 5066, count = 2
Son, pid = 5066, count = 3
Father, pid = 5065 count = 3

 
        
3. CLONE_VFORK
這里沒有共享內存空間,但是父進程要等待子進程結束。
所以父進程在子進程后打印,且count=3。

Father, initial count = 1, pid = 4998
Son, pid = 4999, count = 2
Son, pid = 4999, count = 3
Father, pid = 4998 count = 1

4. 0

父子進程不共享內存,但是父進程在結束時繼續等待子進程退出。

這里看不出count是否共享。

Father, initial count = 1, pid = 5174
Father, pid = 5174 count = 1
Son, pid = 5175, count = 2
Son, pid = 5175, count = 3

在父進程printf之前加sleep(1),結果如下:

和預期一樣,主進程count是單獨一份,而沒有和子進程共用。

Father, initial count = 1, pid = 5257
Son, pid = 5258, count = 2
Son, pid = 5258, count = 3
Father, pid = 5257 count = 1

 

參考文檔:linux系統調用fork, vfork, clone

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM