塊設備驅動之I/O調度層之調度器

本文轉載自查看原文 2016-06-10 21:03 2340 塊設備專欄

　　通過generic_make_request提交請求給I/O調度層，這個函數最后調用到q->make_request_fn(q, bio)，那么對於這個函數的調用就是I/O調度層的入口點，首先來看看這個make_request_fn在哪被賦於能量的

void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
{
    /*
     * set defaults
     */
    q->nr_requests = BLKDEV_MAX_RQ;　　　　//最大的請求數為128　　

    q->make_request_fn = mfn;　　　　　　　//完成bio所描述的請求處理函數
    blk_queue_dma_alignment(q, 511);　　　//該函數用於告知內核塊設備DMA 傳送的內存對齊限制
    blk_queue_congestion_threshold(q);　　//主要做流控
    q->nr_batching = BLK_BATCH_REQ;

    blk_set_default_limits(&q->limits);　　　//塊設備在處理io時會受到一些參數（設備的queue limits參數）的影響，如請求中允許的最大扇區數
　　　　　　　　　　　　　　　　　　　　　　　　　　//這些參數都可以在/sys/block//queue/下查看，塊設備在初始化時會設置默認值

    /*
     * by default assume old behaviour and bounce for any highmem page
     */　　　　　　　　　　　　　　　　　　　　　　　　　　//BLK_BOUNCE_HIGH:對高端內存頁使用反彈緩沖
    blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);　　//此函數告知內核設備執行DMA時,可使用的最高物理地址dma_addr//
}

　　從上面可以看出，這個函數是設置一些請求隊列的參數，如請求數目，dma處理的時候的對齊，i/o參數和請求處理函數。下面需要層層剝絲，直到發現由哪個函數來處理我們的請求，還有使用怎么樣的算法來處理這些請求隊列。

struct request_queue *
blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
             spinlock_t *lock)
{
    if (!q)
        return NULL;

    q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);　　　　//申請blk_flush_queue
    if (!q->fq)
        return NULL;

    if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))　　　　　　　　//初始化request_list
        goto fail;

    q->request_fn        = rfn;　　　　　　　　　　　　　　　　　　//請求處理函數,當內核期望驅動程序執行某些動作時，就會使用這個函數
    q->prep_rq_fn        = NULL;
    q->unprep_rq_fn        = NULL;
    q->queue_flags        |= QUEUE_FLAG_DEFAULT;

    /* Override internal queue lock with supplied lock pointer */
    if (lock)
        q->queue_lock        = lock;

    /*
     * This also sets hw/phys segments, boundary and size
     */
    blk_queue_make_request(q, blk_queue_bio);　　　　　　　　//設置bio所描述的請求處理函數

    q->sg_reserved_size = INT_MAX;

    /* Protect q->elevator from elevator_change */
    mutex_lock(&q->sysfs_lock);

    /* init elevator */
    if (elevator_init(q, NULL)) {　　　　　　　　　　　　　　//初始化調度算法
        mutex_unlock(&q->sysfs_lock);
        goto fail;
    }

    mutex_unlock(&q->sysfs_lock);

    return q;

fail:
    blk_free_flush_queue(q->fq);
    return NULL;
}

如果被訪問的設備是一個有queue的塊設備，那么系統會調用blk_queue_bio函數進行bio的調度合並。

static void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
    const bool sync = !!(bio->bi_rw & REQ_SYNC);
    struct blk_plug *plug;
    int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
    struct request *req;
    unsigned int request_count = 0;

    /*
     * low level driver can indicate that it wants pages above a
     * certain limit bounced to low memory (ie for highmem, or even
     * ISA dma in theory)
     */
/* 為了建立bounce buffer，以防止不適合這次I/O操作的時候利用bounce buffer*/
    blk_queue_bounce(q, &bio);　　　　　　　　　　　　　　　　　　　　

    if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {　　　　//數據完整性校驗         bio_endio(bio, -EIO);
        return;
    }

    if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
        spin_lock_irq(q->queue_lock);
        where = ELEVATOR_INSERT_FLUSH;
        goto get_rq;
    }

    /*
     * Check if we can merge with the plugged list before grabbing
     * any locks.
     */
    if (!blk_queue_nomerges(q) &&　　　　　　　　　　　　　　　　　　//請求隊列不允許合並請求
        blk_attempt_plug_merge(q, bio, &request_count))　　　　//將bio合並到當前plugged的請求隊列中 return;

    spin_lock_irq(q->queue_lock);

    el_ret = elv_merge(q, &req, bio);　　　　　　　　　　　　　　//elv_merge是核心函數，找到bio前向或者后向合並的請求 if (el_ret == ELEVATOR_BACK_MERGE) {　　　　　　　　　　　　//進行后向合並操作 if (bio_attempt_back_merge(q, req, bio)) {
            elv_bio_merged(q, req, bio);
            if (!attempt_back_merge(q, req))
                elv_merged_request(q, req, el_ret);
            goto out_unlock;
        }
    } else if (el_ret == ELEVATOR_FRONT_MERGE) {　　　　　　// 進行前向合並操作 if (bio_attempt_front_merge(q, req, bio)) {
            elv_bio_merged(q, req, bio);
            if (!attempt_front_merge(q, req))
                elv_merged_request(q, req, el_ret);
            goto out_unlock;
        }
    }
/* 無法找到對應的請求實現合並 */ get_rq:
    /*
     * This sync check and mask will be re-done in init_request_from_bio(),
     * but we need to set it earlier to expose the sync flag to the
     * rq allocator and io schedulers.
     */
    rw_flags = bio_data_dir(bio);
    if (sync)
        rw_flags |= REQ_SYNC;

    /*
     * Grab a free request. This is might sleep but can not fail.
     * Returns with the queue unlocked.
     */
    req = get_request(q, rw_flags, bio, GFP_NOIO);　　　　　　　　　　//獲取一個empty request請求 if (IS_ERR(req)) {
        bio_endio(bio, PTR_ERR(req));    /* @q is dead */
        goto out_unlock;
    }

    /*
     * After dropping the lock and possibly sleeping here, our request
     * may now be mergeable after it had proven unmergeable (above).
     * We don't worry about that case for efficiency. It won't happen
     * often, and the elevators are able to handle it.
     */
    init_request_from_bio(req, bio);　　　　　　　　　　　　　　　　　　//采用bio對request請求進行初始化 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
        req->cpu = raw_smp_processor_id();

    plug = current->plug;
    if (plug) {
        /*
         * If this is the first request added after a plug, fire
         * of a plug trace.
         */
        if (!request_count)
            trace_block_plug(q);
        else {
            if (request_count >= BLK_MAX_REQUEST_COUNT) {
                blk_flush_plug_list(plug, false);　　　　　　　　　　　　//請求數量達到隊列上限值，進行unplug操作                 trace_block_plug(q);
            }
        }
        list_add_tail(&req->queuelist, &plug->list);　　　　　　　　　　//將請求加入到隊列  blk_account_io_start(req, true);
    } else {
        spin_lock_irq(q->queue_lock);
        add_acct_request(q, req, where);
        __blk_run_queue(q);
out_unlock:
        spin_unlock_irq(q->queue_lock);
    }
}

對於 blk_queue_bio函數主要做了三件事情：

1）進行請求的后向合並操作

2）進行請求的前向合並操作

3）如果無法合並請求，那么為 bio 創建一個 request ，然后進行調度

在 bio 合並過程中，最為關鍵的函數是 elv_merge 。該函數主要工作是判斷 bio 是否可以進行后向合並或者前向合並。

int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
    struct elevator_queue *e = q->elevator;
    struct request *__rq;
    int ret;

    /*
     * Levels of merges:
     *     nomerges:  No merges at all attempted
     *     noxmerges: Only simple one-hit cache try
     *     merges:       All merge tries attempted
     */
    if (blk_queue_nomerges(q))　　　　　　　　　　　　　　　　//請求隊列不允許合並請求，則返回NO_MERGE 
        return ELEVATOR_NO_MERGE;

    /*
     * First try one-hit cache.
     */　　　　　　　　　　　　　　　　　　　　　　　　　　　　　//last_merge指向最近進行合並操作的request，並成功合並
    if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {　　
        ret = blk_try_merge(q->last_merge, bio);
        if (ret != ELEVATOR_NO_MERGE) {
            *req = q->last_merge;
            return ret;
        }
    }

    if (blk_queue_noxmerges(q))　　　　　　　　　　　　　　　　　　　　　　
　　　　return ELEVATOR_NO_MERGE;

    /*
     * See if our hash lookup can find a potential backmerge.
     */
    __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);　　　　　　//根據bio的起始扇區號，通過rq的哈希表尋找一個request,可以將bio合並到request的尾部 if (__rq && elv_rq_merge_ok(__rq, bio)) {
        *req = __rq;
        return ELEVATOR_BACK_MERGE;
    }
/*如果以上的方法不成功，則調用特定於io調度器的elevator_merge_fn函數尋找一個合適的request*/   if (e->type->ops.elevator_merge_fn)
        return e->type->ops.elevator_merge_fn(q, req, bio);

    return ELEVATOR_NO_MERGE;
}

elevator_merge_fn是特定於I/O調度器的方式，涉及到調度的算法，留待下一章來分析。通過elv_merge，得到了bio是向前還是向后走到相應的處理接口中，下面來分別看看向前或者向后的處理方式。

1. elv_bio_merged

void elv_bio_merged(struct request_queue *q, struct request *rq,
            struct bio *bio)
{
    struct elevator_queue *e = q->elevator;

    if (e->type->ops.elevator_bio_merged_fn)
        e->type->ops.elevator_bio_merged_fn(q, rq, bio);　　　　　　　　//調用調度算法的處理函數，這個只是針對cfq的算法提供
}

2. elv_merged_request

void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
    struct elevator_queue *e = q->elevator;

    if (e->type->ops.elevator_merged_fn)
        e->type->ops.elevator_merged_fn(q, rq, type);　　　　　　　　//調用調度算法的合並函數 if (type == ELEVATOR_BACK_MERGE)
        elv_rqhash_reposition(q, rq);

    q->last_merge = rq;
}

由上面來看，對於合並和調度都會用到一些算法的回調接口，下章主要針對調度算法來看看內核支持那些調度算法，各有什么優缺點。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 塊設備驅動之I/O調度層之調度算法塊設備驅動之通用層 Linux I/O 調度算法 I/O設備和設備控制器 I/O設備——I/O設備的概念 Linux設備驅動中的阻塞和非阻塞I/O I/O設備總結 I/O設備——接口 CPU調度——EAS調度器 Linux 塊設備驅動（一）