files_struct/fdtable解析


files_struct/fdtable解析

 

include/linux/fdtable.h

/*
 * Open file table structure
 */
struct files_struct {
  /*
   * read mostly part
   */
    atomic_t count;
    bool resize_in_progress;
    wait_queue_head_t resize_wait;

    struct fdtable __rcu *fdt;
    struct fdtable fdtab;
  /*
   * written part on a separate cache line in SMP
   */
    spinlock_t file_lock ____cacheline_aligned_in_smp;
    unsigned int next_fd;
    unsigned long close_on_exec_init[1];
    unsigned long open_fds_init[1];
    unsigned long full_fds_bits_init[1];
    struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};

 

上述files_struct中最關鍵的成員是struct fdtable的fdt指針

對於小進程fork時父進程open的文件數小於NR_OPEN_DEFAULT,則fd table會直接使用files_struct里的;如果超過NR_OPEN_DEFAULT,則不會使用files_struct里的,會調用alloc_fdtable()進行分配fd table。

然后將父進程的fd table拷貝到新fork的進程的fd table。

NR_OPEN_DEFAULT定義為BITS_PER_LONG,一般為64。對於較大進程fork子進程時,父進程此時一般已經open了較多file,比如超過了64,此時就會alloc fd table,而不會使用files_struct里的default fd table;對於小進程fork子進程,此時一般就直接用了files_struct里default fd table。

fs/file.c

/*
 * Allocate a new files structure and copy contents from the
 * passed in files structure.
 * errorp will be valid only when the returned files_struct is NULL.
 */
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
{
    struct files_struct *newf;
    struct file **old_fds, **new_fds;
    unsigned int open_files, i;
    struct fdtable *old_fdt, *new_fdt;

    *errorp = -ENOMEM;
    newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
    if (!newf)
        goto out;

    atomic_set(&newf->count, 1);

    spin_lock_init(&newf->file_lock);
    newf->resize_in_progress = false;
    init_waitqueue_head(&newf->resize_wait);
    newf->next_fd = 0;
    new_fdt = &newf->fdtab;
    new_fdt->max_fds = NR_OPEN_DEFAULT;
    new_fdt->close_on_exec = newf->close_on_exec_init;
    new_fdt->open_fds = newf->open_fds_init;
    new_fdt->full_fds_bits = newf->full_fds_bits_init;
    new_fdt->fd = &newf->fd_array[0];

    spin_lock(&oldf->file_lock);
    old_fdt = files_fdtable(oldf);
    open_files = sane_fdtable_size(old_fdt, max_fds);

    /*
     * Check whether we need to allocate a larger fd array and fd set.
     */
    while (unlikely(open_files > new_fdt->max_fds)) {
        spin_unlock(&oldf->file_lock);

        if (new_fdt != &newf->fdtab)
            __free_fdtable(new_fdt);

        new_fdt = alloc_fdtable(open_files - 1);
        if (!new_fdt) {
            *errorp = -ENOMEM;
            goto out_release;
        }

        /* beyond sysctl_nr_open; nothing to do */
        if (unlikely(new_fdt->max_fds < open_files)) {
            __free_fdtable(new_fdt);
            *errorp = -EMFILE;
            goto out_release;
        }

        /*
         * Reacquire the oldf lock and a pointer to its fd table
         * who knows it may have a new bigger fd table. We need
         * the latest pointer.
         */
        spin_lock(&oldf->file_lock);
        old_fdt = files_fdtable(oldf);
        open_files = sane_fdtable_size(old_fdt, max_fds);
    }

    copy_fd_bitmaps(new_fdt, old_fdt, open_files);

    old_fds = old_fdt->fd;
    new_fds = new_fdt->fd;

    for (i = open_files; i != 0; i--) {
        struct file *f = *old_fds++;
        if (f) {
            get_file(f);
        } else {
            /*
             * The fd may be claimed in the fd bitmap but not yet
             * instantiated in the files array if a sibling thread
             * is partway through open().  So make sure that this
             * fd is available to the new process.
             */
            __clear_open_fd(open_files - i, new_fdt);
        }
        rcu_assign_pointer(*new_fds++, f);
    }
    spin_unlock(&oldf->file_lock);

    /* clear the remainder */
    memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));

    rcu_assign_pointer(newf->fdt, new_fdt);

    return newf;

out_release:
    kmem_cache_free(files_cachep, newf);
out:
    return NULL;
}

 

 

fd table里的內容

include/linux/fdtable.h

struct fdtable {
    unsigned int max_fds;
    struct file __rcu **fd;      /* current fd array */
    unsigned long *close_on_exec;
    unsigned long *open_fds;
    unsigned long *full_fds_bits;
    struct rcu_head rcu;
};

 

max_fds,表示此fd table最多能容納多少個fd;

struct file的二重指針fd,這個是指向一個數組,這個數組里的元素是struct file *指針;

open_fds,long型指針,指向一個long型數組,數組中的每個元素的每一個bit代表一個文件,如果這個bit為1,表示此文件已open;

close_on_exec,和open_fds功能一樣,含義不同;

full_fds_bits,long型數組,每一個bit代表open_fds里每個元素所有bit是否都為1,如果都為1,這個bit置上;只要有一個不為1,這個bit將被clear;

根據allc_fdtable(),可以看到open_fds/close_on_exec/full_fds_bits數組是一塊申請分配的,內存layout順序依次是open_fds/close_on_exec/full_fds_bits

alloc_fdtable()

fs/file.c

static struct fdtable * alloc_fdtable(unsigned int nr)
{
    struct fdtable *fdt;
    void *data;

    /*
     * Figure out how many fds we actually want to support in this fdtable.
     * Allocation steps are keyed to the size of the fdarray, since it
     * grows far faster than any of the other dynamic data. We try to fit
     * the fdarray into comfortable page-tuned chunks: starting at 1024B
     * and growing in powers of two from there on.
     */
    nr /= (1024 / sizeof(struct file *));
    nr = roundup_pow_of_two(nr + 1);
    nr *= (1024 / sizeof(struct file *));
    /*
     * Note that this can drive nr *below* what we had passed if sysctl_nr_open
     * had been set lower between the check in expand_files() and here.  Deal
     * with that in caller, it's cheaper that way.
     *
     * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
     * bitmaps handling below becomes unpleasant, to put it mildly...
     */
    if (unlikely(nr > sysctl_nr_open))
        nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;

    fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
    if (!fdt)
        goto out;
    fdt->max_fds = nr;
    data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
    if (!data)
        goto out_fdt;
    fdt->fd = data;

    data = kvmalloc(max_t(size_t,
                 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
                 GFP_KERNEL_ACCOUNT);
    if (!data)
        goto out_arr;
    fdt->open_fds = data;
    data += nr / BITS_PER_BYTE;
    fdt->close_on_exec = data;
    data += nr / BITS_PER_BYTE;
    fdt->full_fds_bits = data;

    return fdt;

out_arr:
    kvfree(fdt->fd);
out_fdt:
    kfree(fdt);
out:
    return NULL;
}

 

重點看下上述kvmalloc(),這個分配的大小是2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr),nr表示此fd table要容納多少個fd,nr/BITS_PER_BYTE,一個bit表示一個fd,所以這個表示容納nr個fd需要多少個byte;

*2是因為有open_fds和close_on_exec兩個大小一樣的數組;

BITBIT_SIZE(nr),這個宏定義如下。假設nr為65*64,則BITBIT_SIZE(65*64)的結果為2*8,即為兩個long型,這兩個long型的每個bit為1表示open_fds里一個元素(long型)所有bit均為1

#define BITBIT_NR(nr)    BITS_TO_LONGS(BITS_TO_LONGS(nr))
#define BITBIT_SIZE(nr)    (BITBIT_NR(nr) * sizeof(long))

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM