Linux下SIGSTOP的特殊特征和實現

本文轉載自查看原文 2019-03-06 20:33 606 163博客部分遷移

一、問題的引出

在多線程用戶態程序中，為了更加准確詳細的從一個線程觀察另一個線程的行為，可能有時候需要讓目標線程暫時安靜下來，從而便於觀測和監控。關於這個行為，首先想到的當然就是向一個線程發送一個SIGSTOP信號(注意，不是向進程，就是通過內核的tkill系統調用，或者說pthread_kill)，從而讓線程處於STOP狀態，之后再通過SIGCONT讓線程繼續運行，這樣是最為簡單而環保的方法。但是事實測試的時候會發現這個信號即使是只發給內核的單個線程，也會造成整個線程組中所有線程被停止，這就是一個比較奇怪的現象了。

二、內核對tkill的處理

linux-2.6.37.1\kernel\signal.c

SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
{
/* This is only valid for single tasks */
if (pid <= 0)
return -EINVAL;

return do_tkill(0, pid, sig);
}

-->>>do_tkill--->>>do_send_sig_info(sig, info, p, false)---->>>send_signal--->>>__send_signal

pending = group ? &t->signal->shared_pending : &t->pending;

從代碼上看，tkill發出的信號是發送給了線程私有的pending信號隊列，所以直到這里看來，它依然是應該只有目標線程會接受這個信號。

三、停止線程組代碼實現

do_signal--->>>get_signal_to_deliver

  signr = tracehook_get_signal(current, regs, info, return_ka);
  if (unlikely(signr < 0))
   goto relock;
  if (unlikely(signr != 0))
   ka = return_ka;
  else {
   if (unlikely(signal->group_stop_count > 0) &&
       do_signal_stop(0))結合后面的說明，如果說線程正處在一個線程組停止狀態並且還有未處於stop狀態的線程，則執行do_signal_stop自行停止調度。
    goto relock;

signr = dequeue_signal(current, &current->blocked,
info);

if (!signr)
break; /* will return 0 */

   if (signr != SIGKILL) {
    signr = ptrace_signal(signr, info,
            regs, cookie);
    if (!signr)
     continue;
   }
………………

內核定義的停止信號

#define SIG_KERNEL_STOP_MASK (\
rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \
rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) )

if (sig_kernel_stop(signr)) {所以SIGSTOP信號將會走入該流程。
   /*
    * The default action is to stop all threads in
    * the thread group. The job control signals
    * do nothing in an orphaned pgrp, but SIGSTOP
    * always works. Note that siglock needs to be
    * dropped during the call to is_orphaned_pgrp()
    * because of lock ordering with tasklist_lock.
    * This allows an intervening SIGCONT to be posted.
    * We need to check for that and bail out if necessary.
    */
   if (signr != SIGSTOP) {這里也說明了很多TTY操作，如SIGTTIN等也會對線程組產生影響。
    spin_unlock_irq(&sighand->siglock);

/* signals can be posted during this window */

if (is_current_pgrp_orphaned())
goto relock;

spin_lock_irq(&sighand->siglock);
}

   if (likely(do_signal_stop(info->si_signo))) {
    /* It released the siglock. */
    goto relock;
   }

   /*
    * We didn't actually stop, due to a race
    * with SIGCONT or something like that.
    */
   continue;
  }

下面是do_signal_stop的代碼，這個是對於這個特征的核心代碼
/*
* This performs the stopping for SIGSTOP and other stop signals.
* We have to stop all threads in the thread group.
* Returns nonzero if we've actually stopped and released the siglock.
* Returns zero if we didn't stop and still hold the siglock.
*/
static int do_signal_stop(int signr)
{
struct signal_struct *sig = current->signal;
int notify;

if (!sig->group_stop_count) {如果說gropu_stop_Count為零，則說明線程組STOP還沒有啟動，所以在下面的指令中要把這個值設置為需要被STOP的線程的數目。
struct task_struct *t;

  if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
      unlikely(signal_group_exit(sig)))
   return 0;
  /*
   * There is no group stop already in progress.
   * We must initiate one now.
   */
  sig->group_exit_code = signr;

  sig->group_stop_count = 1;
  for (t = next_thread(current); t != current; t = next_thread(t))
   /*
    * Setting state to TASK_STOPPED for a group
    * stop is always done with the siglock held,
    * so this check has no races.
    */
   if (!(t->flags & PF_EXITING) &&
       !task_is_stopped_or_traced(t)) {
    sig->group_stop_count++;便利線程組中所有線程，對每一個尚未被處理的線程在group_stop_count中加一。
    signal_wake_up(t, 0);
   }
}
/*
* If there are no other threads in the group, or if there is
* a group stop in progress and we are the last to stop, report
* to the parent. When ptraced, every thread reports itself.
*/
notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
notify = tracehook_notify_jctl(notify, CLD_STOPPED);
/*
* tracehook_notify_jctl() can drop and reacquire siglock, so
* we keep ->group_stop_count != 0 before the call. If SIGCONT
* or SIGKILL comes in between ->group_stop_count == 0.
*/
if (sig->group_stop_count) {
  if (!--sig->group_stop_count)這個線程組全部完成了STOP。
   sig->flags = SIGNAL_STOP_STOPPED;
  current->exit_code = sig->group_exit_code;
  __set_current_state(TASK_STOPPED);所有執行這個函數的線程都被設置為了TASK_STOPPED狀態，接下來執行schedule函數之后該線程將會被從運行隊列中移除，次數受該函數開始if中設置的group_stop_count值決定。。
}
spin_unlock_irq(&current->sighand->siglock);

if (notify) {
  read_lock(&tasklist_lock);
  do_notify_parent_cldstop(current, notify);
  read_unlock(&tasklist_lock);
}

/* Now we don't run again until woken by SIGCONT or SIGKILL */
do {從這里該線程將會切換出去，讓出調度權，

schedule();
} while (try_to_freeze());

tracehook_finish_jctl();
current->exit_code = 0;

return 1;
}

四、調試器如何做到對單個線程的SIGSTOP
由於調試器和被調試任務之間有一種內核可以感知到的調試關系，也就是被調試任務的PT_PTRACED的標志位被置位，所以當一個線程收到信號並且要處理的時候，內核會首先給調試器一個機會
get_signal_to_deliver---->>ptrace_stop--->>do_notify_parent_cldstop
    if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
            ptrace_signal_deliver(regs, cookie);

            /* Let the debugger run. */
            ptrace_stop(signr, signr, info);如果調試器在這個函數之后取消掉信號，也就是讓exit_code清零，則下面的continue將會忽略這個信號。

            /* We're back. Did the debugger cancel the sig? */
            signr = current->exit_code;
            if (signr == 0)
                continue;
所以在沒有啟動線程組暫停之前，調試器優先獲得控制權，所以調試器可以判斷出是自己發送的信號，在進行必要的操作之后，通過ptrace系統調用再取消這個信號，從而讓線程組退出夭折。linux-2.6.21\arch\i386\kernel\ptrace.c
long arch_ptrace(struct task_struct *child, long request, long addr, long data)

case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
    case PTRACE_SYSCALL:    /* continue and stop at next (return from) syscall */
    case PTRACE_CONT:    /* restart after signal. */
        ret = -EIO;
        if (!valid_signal(data))
            break;
        if (request == PTRACE_SYSEMU) {
            set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
            clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
        } else if (request == PTRACE_SYSCALL) {
            set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
            clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
        } else {
            clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
            clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
        }
        child->exit_code = data;調試器可以通過PTRACE_CONT清除這個信號。
        /* make sure the single step bit is not set. */
        clear_singlestep(child);
        wake_up_process(child);
        ret = 0;
        break;

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 特征工程系列：特征篩選的原理與實現（下） Linux基礎之特殊權限 Linux文件特殊權限 HOG特征(代碼實現） Linux 下的兩個特殊的文件 -- /dev/null 和 /dev/zero 簡介及對比 Linux 下如何處理包含空格和特殊字符的文件名 Linux下C實現RPC Linux下system()函數的實現 Linux 特殊符號詳解 Linux權限管理---特殊權限