select函數的原理


首先再來提一下I/O多路轉接的基本思想:先構造一張有關描述符的表,然后調用一個函數,它要到這些描述符中的一個已准備好進行 I/O時才返回。在返回時,它告訴進程哪一個描述符已准備好可以進行 I/O。

select函數的參數將告訴內核:

(1) 我們所關心的描述符。

(2) 對於每個描述符我們所關心的條件(是否讀一個給定的描述符?是否想寫一個給定的

描述符?是否關心一個描述符的異常條件?)。

(3) 希望等待多長時間(可以永遠等待,等待一個固定量時間,或完全不等待)

select從內核返回后內核會告訴我們:

(1) 已准備好的描述符的數量。

(2) 哪一個描述符已准備好讀、寫或異常條件。

 

select 用於查詢設備的狀態,以便用戶程序獲知是否能對設備進行非阻塞的訪問,需要設備驅動程序中的poll 函數支持。 驅動程序中 poll 函數中最主要用到的一個 API 是 poll_wait,其原型如下:

void poll_wait(struct file *filp, wait_queue_heat_t *queue, poll_table * wait);

poll_wait 函數所做的工作是把當前進程添加到 wait 參數指定的等待列表(poll_table)中。

需要說明的是,poll_wait 函數並不阻塞,程序中 poll_wait(filp, &outq, wait)這句話的意思並不是說一直等待 outq 信號量可獲得,真正的阻塞動作是上層的 select/poll 函數中完成的。select/poll 會在一個循環中對每個需要監聽的設備調用它們自己的 poll 支持函數以使得當前進程被加入各個設備的等待列表。若當前沒有任何被監聽的設備就緒,則內核進行調度(調用 schedule)讓出 cpu 進入阻塞狀態,schedule 返回時將再次循環檢測是否有操作可以進行,如此反復;否則,若有任意一個設備就緒,select/poll 都立即返回。

 

應用程序調用select() 函數,系統調用陷入內核,進入到:

SYSCALL_DEFINE5 (sys_select)----> core_sys_select -----> do_select()

 

SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,

              fd_set __user *, exp, struct timeval __user *, tvp)//n為文件描述符

{

       struct timespec end_time, *to = NULL;

       struct timeval tv;

       int ret;

 

       if (tvp) {

              if (copy_from_user(&tv, tvp, sizeof(tv)))

                     return -EFAULT;

 

              to = &end_time;

              if (poll_select_set_timeout(to,

                            tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),

                            (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))

                     return -EINVAL;

       }

 

       ret = core_sys_select(n, inp, outp, exp, to);

       ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);

 

       return ret;

}

 

在core_sys_select() 函數中調用了do_select:

 (覺得用代碼格式反而不好看)

int do_select(int n, fd_set_bits *fds, struct timespec *end_time)

{

         ktime_t expire, *to = NULL;

         struct poll_wqueues table;

         poll_table *wait;

         int retval, i, timed_out = 0;

         unsigned long slack = 0;

 

         rcu_read_lock();

         retval = max_select_fd(n, fds);

         rcu_read_unlock();

 

         if (retval < 0)

                   return retval;

         n = retval;

 

         poll_initwait(&table);//初始化結構體,主要是初始化poll_wait的回調函數為__pollwait

         wait = &table.pt;

         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {

                   wait = NULL;

                   timed_out = 1;

         }

 

         if (end_time && !timed_out)

                   slack = estimate_accuracy(end_time);

 

         retval = 0;

         for (;;) {

                   unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;

 

                   inp = fds->in; outp = fds->out; exp = fds->ex;

                   rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;

 

                   for (i = 0; i < n; ++rinp, ++routp, ++rexp) {

                            unsigned long in, out, ex, all_bits, bit = 1, mask, j;

                            unsigned long res_in = 0, res_out = 0, res_ex = 0;

                            const struct file_operations *f_op = NULL;

                            struct file *file = NULL;

                            in = *inp++; out = *outp++; ex = *exp++;

                            all_bits = in | out | ex;

                            if (all_bits == 0) {

                                     i += __NFDBITS;

                                     continue;

                            }

 

                            for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {

                                     int fput_needed;

                                     if (i >= n)

                                               break;

                                     if (!(bit & all_bits))

                                               continue;

                                     file = fget_light(i, &fput_needed);

                                      if (file) {

                                               f_op = file->f_op;

                                               mask = DEFAULT_POLLMASK;

                                               if (f_op && f_op->poll) { 

                                                        wait_key_set(wait, in, out, bit);

                                                        mask = (*f_op->poll)(file, wait););//調用poll_wait處理過程,

                                                        //即把驅動中等待隊列頭增加到poll_wqueues中的entry中,並把指向

                                                        //當前里程的等待隊列項增加到等待隊列頭中。每一個等待隊列頭占用一個entry

                                               }

                                               fput_light(file, fput_needed);

                                               if ((mask & POLLIN_SET) && (in & bit)) {//如果有信號進行設置,記錄,寫回到對應項,設置跳出循環的retval

                                                        res_in |= bit;

                                                        retval++;

                                                        wait = NULL;

                                               }

                                               if ((mask & POLLOUT_SET) && (out & bit)) {

                                                        res_out |= bit;

                                                        retval++;

                                                        wait = NULL;

                                               }

                                               if ((mask & POLLEX_SET) && (ex & bit)) {

                                                        res_ex |= bit;

                                                        retval++;

                                                        wait = NULL;

                                               }

                                     }

                            }

                            if (res_in)

                                     *rinp = res_in;

                            if (res_out)

                                     *routp = res_out;

                            if (res_ex)

                                     *rexp = res_ex;

                            cond_resched();//增加搶占點,調度其它進程,當前里程進入睡眠

                   }

                   wait = NULL;

                   if (retval || timed_out || signal_pending(current))//這里就跳出循環,需要講一下signal_pending

                            break;

                   if (table.error) {

                            retval = table.error;

                            break;

                   }

                   /*

                    * If this is the first loop and we have a timeout

                    * given, then we convert to ktime_t and set the to

                    * pointer to the expiry value.

                    */

                    //讀取需要等待的時間,等待超時

                   if (end_time && !to) { 

                            expire = timespec_to_ktime(*end_time);

                            to = &expire;

                   }

                   if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,to, slack))

                           timed_out = 1;

         }

         poll_freewait(&table);//從等待隊列頭中刪除poll_wait中添加的等待隊列,並釋放資源

         return retval;//調用成功與否就看這個返回值

}

do_select大概的思想就是:當應用程序調用select() 函數, 內核就會相應調用 poll_wait(), 把當前進程添加到相應設備的等待隊列上,然后將該應用程序進程設置為睡眠狀態。直到該設備上的數據可以獲取,然后調用wake up 喚醒該應用程序進程。

注:分析內核代碼離不開sourceInsight,只不過建議用英文版,我的中文版改不了字體,看起來很不方便。可以到http://kernel.org/下載源碼來放到sourceInsight的工程中。然后就是使用Linux Cross Reference 進行查詢。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM