SPDK線程模型


(二)reactor框架: 上層APP協議,與reactor框架的交互
(1.1)SPDK的主線程
SPDK(APP)在啟動時候會讓指定綁定在那些core上運行,這樣在每個core上會創建一個線程(他叫reactor),這個線程不停的做polling操作,而如果你要在這個線程上做事情,則需要注冊poller( 可以理解為一個poller就是SPDK中一個事情的thread入口函數,但是),這個線程就不停的調用poller的機型函數執行你要執行的動作。


spdk_reactors_start 
   reactor_run
     //SPDK中斷模式:略
     reactor_interrupt_run 
     
     //SPDK輪詢模式:主循環
     _reactor_run
         //主循環內:Step-1
         event_queue_run_batch
              spdk_ring_dequeue
              spdk_event->fn
         //主循環內:Step-2   
         spdk_thread_poll
              thread_poll   
                  //thread_poll內:Step1
                  msg_queue_run_batch
                       spdk_ring_dequeue
                       spdk_msg->fn

                  //thread_poll內:Step2     
                  thread_execute_poller
                       spdk_poller->fn
                       // 分為 vhost_blk / vhost_scsi 兩種
                       vdev_worker
                          process_vq  
                             vhost_vq_avail_ring_get
                             // 分支A:vhost_blk
                             process_blk_task
                                  vhost_user_process_blk_request
                                     virtio_blk_process_request
                                         spdk_bdev_readv
                                         spdk_bdev_writev
                                         blk_request_finish
                             // 分支B:vhost_scsi
                             process_scsi_task
                               task_submit
                                 spdk_scsi_dev_queue_task
                                    scsi_lun_execute_task
                                       bdev_scsi_execute
                                          bdev_scsi_process_block
                                             bdev_scsi_readwrite

                  //thread_poll內:Step3                            
                  thread_execute_timed_poller
                       spdk_poller->fn
 
          //主循環內:Step-3   
          reactor_post_process_lw_thread  
            
                _reactor_schedule_thread
                    spdk_event_allocate
                    spdk_event_call

(三)spdk-bdev框架:與后端設備的交互

(3.1)spdk-bdev框架對上層服務提供的讀寫API接口

讀接口:spdk_bdev_read / spdk_bdev_readv 
寫接口:spdk_bdev_write / spdk_bdev_writev
 [bdev_io_submit]
    _bdev_io_submit
      bdev_io_do_submit
        spdk_bdev->fn_table->submit_request
        <A> bdev_nvme_submit_request
              bdev_nvme_readv
                spdk_nvme_ns_cmd_read_with_md
                  nvme_qpair_submit_request
                      nvme_transport_qpair_submit_request
                         spdk_nvme_transport->ops.qpair_submit_request
                          <A-1> nvme_pcie_qpair_submit_request
                          <A-2> nvme_rdma_qpair_submit_request
                          <A-3> nvme_tcp_qpair_submit_request
        <B> bdev_pmem_submit_request
        <C> bdev_virtio_submit_request
        <D> bdev_iscsi_submit_request
        <E> bdev_aio_submit_request

(3.3)spdk-bdev的初始化
bdev子系統的初始化函數:bdev_XXX_initialize

spdk_subsystem->init
bdev_subsystem_initialize
  spdk_bdev_initialize
    bdev_modules_init
      spdk_bdev_module->module_init
        bdev_nvme_library_init
        bdev_virtio_initialize
        bdev_iscsi_initialize
        bdev_aio_initialize
        bdev_pmem_initialize
        bdev_uring_init

(3.3)spdk-bdev的注冊函數
spdk_bdev_fn_table的注冊函數 XXX_fn_table

spdk_bdev_fn之nvme設備

static const struct spdk_bdev_fn_table nvmelib_fn_table = 
{
	.submit_request	    = bdev_nvme_submit_request,
	.io_type_supported  = bdev_nvme_io_type_supported,
	.get_io_channel	    = bdev_nvme_get_io_channel,	
	.write_config_json  = bdev_nvme_write_config_json,

};

spdk_bdev_fn之pmem設備

static const struct spdk_bdev_fn_table pmem_fn_table = 
{
	.submit_request      = bdev_pmem_submit_request,
	.io_type_supported   = bdev_pmem_io_type_supported,
	.get_io_channel	     = bdev_pmem_get_io_channel,
	.write_config_json   = bdev_pmem_write_config_json,
};

spdk_bdev_fn之virtio設備

static const struct spdk_bdev_fn_table virtio_fn_table =
{
	.submit_request	     = bdev_virtio_submit_request,
	.io_type_supported   = bdev_virtio_io_type_supported,
	.get_io_channel	     = bdev_virtio_get_io_channel,
	.write_config_json   = bdev_virtio_write_config_json,
};

(三)bdev框架中-nvme設備的實現

(3.1)NVME的初始化:
(a)bdev-nvme的子系統初始化:

# spdk_bdev_module->module_init
bdev_nvme_library_init
  bdev_nvme_create_poll_group_cb
    spdk_nvme_poll_group_create
    SPDK_POLLER_REGISTER(bdev_nvme_poll) 
        # 方向:從nvme設備取出應答數據,回復給bdev
        spdk_nvme_poll_group_process_completions
          nvme_transport_poll_group_process_completions
            spdk_nvme_transport_poll_group->transport->ops.poll_group_process_completions
            <A> nvme_pcie_poll_group_process_completions
            <B> nvme_rdma_poll_group_process_completions
            <C> nvme_tcp_poll_group_process_completions

(a)bdev-nvme的attach

rpc_bdev_nvme_attach_controller
  bdev_nvme_create
     spdk_nvme_connect_async
         nvme_probe_internal
             nvme_transport_ctrlr_scan
             spdk_nvme_probe_ctx->attach_cb 
             connect_attach_cb
                 nvme_ctrlr_create
                    SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq)
     SPDK_POLLER_REGISTER(bdev_nvme_async_poll)  

(b)bdev-nvme的probe

spdk_nvme_probe
> spdk_nvme_probe_async
>> nvme_probe_internal
>>> nvme_transport_ctrlr_scan
>>>> spdk_nvme_transport->ops.ctrlr_scan
>>>> nvme_pcie_ctrlr_scan
>>>> nvme_fabric_ctrlr_scan

NVMe-over-RDMA掃描發現

nvme_fabric_ctrlr_scan
  nvme_ctrlr_cmd_identify
     nvme_ctrlr_submit_admin_request
        nvme_qpair_submit_request        
  nvme_fabric_ctrlr_discover
     nvme_fabric_discover_probe
        nvme_ctrlr_probe
           spdk_nvme_probe_ctx->attach_cb
           nvme_transport_ctrlr_construct

(c)然后是給這個NVME盤創建一個io_qpair
是在給這個contrlloer創建一個IO qpair(admin qpair是在創建ctrlloer時候就創建了),
也就是創建submisson-queue和completion-queue (一般是一個sq和cq對應)

spdk_nvme_ctrlr_alloc_io_qpair
  nvme_ctrlr_create_io_qpair
    spdk_nvme_ctrlr_connect_io_qpair
      nvme_transport_ctrlr_connect_qpair
        nvme_poll_group_connect_qpair
          nvme_transport_poll_group_connect_qpair
            spdk_nvme_transport_poll_group->transport->ops.poll_group_connect_qpair
            <A> nvme_pcie_poll_group_connect_qpair
            <B> nvme_rdma_poll_group_connect_qpair
            <C> nvme_tcp_poll_group_connect_qpair

(3.2)bdev框架對NVME盤的具體實現

bdev_io_do_submit
  spdk_bdev->fn_table->submit_request
  bdev_nvme_submit_request
    bdev_nvme_readv
      spdk_nvme_ns_cmd_read_with_md
        nvme_qpair_submit_request
           nvme_transport_qpair_submit_request
              spdk_nvme_transport->ops.qpair_submit_request
              <A> nvme_pcie_qpair_submit_request
              <B> nvme_rdma_qpair_submit_request
              <C> nvme_tcp_qpair_submit_request

      bdev_nvme_readv_done
        bdev_nvme_io_complete_nvme_status
          spdk_bdev_io_complete_nvme_status    
            spdk_bdev_io_complete
              bdev_io_complete
                spdk_bdev_io->internal.cb
                // spdk_bdev_io_completion_cb

(3.3)讀寫請求-PCIe接口

nvme_transport_qpair_submit_request
   spdk_nvme_transport->ops.qpair_submit_request
   nvme_pcie_qpair_submit_request
       nvme_pcie_qpair_build_metadata
       nvme_pcie_qpair_submit_tracker
           // Copy the command from the submit-tracker to submission-queue
           nvme_pcie_copy_command
           nvme_pcie_qpair_ring_sq_doorbell

(3.4)讀寫請求-RDMA接口

nvme_transport_qpair_submit_request
   spdk_nvme_transport->ops.qpair_submit_request
   nvme_rdma_qpair_submit_request
       nvme_rdma_req_init
           nvme_rdma_build_sgl_request
       nvme_rdma_qpair_queue_send_wr
           spdk_rdma_qp_queue_send_wrs
               ibv_wr_send
               ibv_wr_send_inv
               ibv_wr_rdma_read
               ibv_wr_rdma_write
               ibv_wr_set_sge_list
           nvme_rdma_qpair_submit_sends
               spdk_rdma_qp_flush_send_wrs
                   ibv_wr_complete

(6)代碼解析:HotPlug

rpc_bdev_nvme_set_hotplug
  bdev_nvme_set_hotplug
    set_nvme_hotplug_period_cb 
      bdev_nvme_hotplug 
        spdk_nvme_probe_async
          nvme_probe_internal
            nvme_transport_ctrlr_scan
  rpc_bdev_nvme_set_hotplug_done

(四)bdev框架中-virtio設備的實現

(4.4)bdev框架下virtio的實現

rpc_bdev_virtio_attach_controller
   <A> bdev_virtio_pci_blk_dev_create
   <B> bdev_virtio_pci_scsi_dev_create

virtio_blk的實現

rpc_bdev_virtio_attach_controller
   bdev_virtio_pci_blk_dev_create
     bdev_virtio_pci_blk_dev_create_cb
       virtio_pci_blk_dev_create
         virtio_pci_dev_init
         virtio_blk_dev_init
           virtio_dev_start
           bdev_virtio_blk_ch_create_cb
              <注冊> bdev_virtio_poll

virtio_scsi的實現

rpc_bdev_virtio_attach_controller
   bdev_virtio_pci_scsi_dev_create
     bdev_virtio_pci_scsi_dev_create_cb
       virtio_pci_scsi_dev_create
          virtio_pci_dev_init
          virtio_scsi_dev_init
             virtio_dev_start
             bdev_virtio_scsi_ch_create_cb
               <注冊>bdev_virtio_poll

(3)讀寫請求
一個讀寫請求,發到qpair的submisson queue中,這個submisson queue是一個環形隊列,此時這個環形隊列的tail指針++,此時再把這個 tail位置通過門鈴寄存器(每個qpair一個門鈴寄存器)告訴下面的盤(controller)。此時controller根據門鈴寄存器記錄的環形隊列的尾巴知道數據最多可以取到哪里。環形隊列的head指針controller也是可以知道。``
而且處理一個請求這個head指針++,這個head位置是controller來維護的,但是這個位置可以通過completion queue的entry記錄着。
每處理完一個請求,則controller會產生一個完成請求entry,然后掛在completion queue中,然后更新completion queue的head位置,然后通過中斷告訴host可以取完成請求了。
主機host通過head位置訪問completion queue的那個entry。

(6)代碼解析:NVMe-oF Target專題
nvmf的上層模型

spdk_thread_poll
  thread_execute_poller
    nvmf_poll_group_poll
      nvmf_transport_poll_group_poll
	spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
          <A> nvmf_rdma_poll_group_poll
          <B> nvmf_tcp_poll_group_poll
          <C> nvmf_vfio_user_poll_group_poll

nvmf的rdma實現

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_rdma_poll_group_poll
    nvmf_rdma_poller_poll
      nvmf_rdma_qpair_process_pending
        nvmf_rdma_request_process
          spdk_nvmf_request_exec

nvmf的tcp實現

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_tcp_poll_group_poll
    spdk_sock_group_poll
      spdk_sock_group_poll_count
        sock_group_impl_poll_count
           spdk_sock_group_impl->net_impl->group_impl_poll
           uring_sock_group_impl_poll
              io_uring_submit

nvmf的vfio實現

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_vfio_user_poll_group_poll
    handle_sq_tdbl_write
      consume_cmd
        handle_cmd_req
          spdk_nvmf_request_exec
            nvmf_ctrlr_process_io_cmd
              nvmf_bdev_ctrlr_read_cmd
                 spdk_bdev_readv_blocks
                   bdev_readv_blocks_with_md
                     bdev_io_submit
              nvmf_bdev_ctrlr_write_cmd
                 spdk_bdev_writev_blocks
                   bdev_writev_blocks_with_md
                     bdev_io_submit
                    

(6)代碼解析:blob專題
blob的運行環境

blobfs => blob => bdev
rocksdb => blob => bdev

blob向上提供的讀接口:

spdk_file_read
  spdk_file->fs->send_request
    spdk_blob_io_read
      blob_request_submit_op
        blob_request_submit_op_single
          bs_batch_read_dev
            blob_bdev->bs_dev.read
              bdev_blob_read
                 spdk_bdev_read_blocks

blob向上提供的寫接口:

spdk_file_write
  spdk_file->fs->send_request
    spdk_blob_io_write 
      blob_request_submit_op
        blob_request_submit_op_single
          bs_batch_write_dev
            blob_bdev->bs_dev.write
            bdev_blob_write
              spdk_bdev_write_blocks


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM