ulimit功能以及如何在內核中生效


 關鍵詞:ulimit、getrlimit、setrlimit、RLIMIT_CPU、RLIMIT_CORE等等。

 內核資源限制通過ulimit進行讀取和設置;ulimit進行資源設置之后,簡單分析內核中是如何對系統行為進行限制的。

 

1. 了解ulimit(busybox)

以busybox中的ulimit為例,主要通過調用getrlimit()/setrlimit()設置系統的各種資源。

ulimit設置和獲取的資源主要有如下幾種:

#define RLIMIT_CPU        0    /* CPU time in sec */
#define RLIMIT_FSIZE        1    /* Maximum filesize */
#define RLIMIT_DATA        2    /* max data size */
#define RLIMIT_STACK        3    /* max stack size */
#define RLIMIT_CORE        4    /* max core file size */
#define RLIMIT_RSS        5    /* max resident set size */
#define RLIMIT_NPROC 6 /* max number of processes */ #define RLIMIT_NOFILE 7 /* max number of open files */ #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ #define RLIMIT_AS 9 /* address space limit */ #define RLIMIT_LOCKS 10 /* maximum file locks held */ #define RLIMIT_SIGPENDING 11 /* max number of pending signals */ #define RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */ #define RLIMIT_NICE 13 /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */ #define RLIMIT_RTPRIO 14 /* maximum realtime priority */ #define RLIMIT_RTTIME 15 /* timeout for RT tasks in us */ #define RLIM_NLIMITS 16

用戶空間對內核資源的限制通過getrlimit()/setrlimit()兩個函數進行。

其中resource就是如上的宏定義,struct rlimit是用戶輸入的閾值。

struct rlimit {
    rlim_t rlim_cur;  /* Soft limit */ rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */ }; 
#include <sys/time.h> #include <sys/resource.h> int getrlimit(int resource, struct rlimit *rlim); int setrlimit(int resource, const struct rlimit *rlim); int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct rlimit *old_limit);

內核中resource類型和ulimit命令的對應關系,通過limits_tbl[]關聯起來。

static const struct limits limits_tbl[] = {
    { RLIMIT_FSIZE,        9,    'f',    "file size (blocks)" }, { RLIMIT_CPU, 0, 't', "cpu time (seconds)" }, { RLIMIT_DATA, 10, 'd', "data seg size (kb)" }, { RLIMIT_STACK, 10, 's', "stack size (kb)" }, { RLIMIT_CORE, 9, 'c', "core file size (blocks)" }, { RLIMIT_RSS, 10, 'm', "resident set size (kb)" }, { RLIMIT_MEMLOCK, 10, 'l', "locked memory (kb)" }, { RLIMIT_NPROC, 0, 'p', "processes" }, { RLIMIT_NOFILE, 0, 'n', "file descriptors" }, { RLIMIT_AS, 10, 'v', "address space (kb)" }, { RLIMIT_LOCKS, 0, 'w', "locks" }, { RLIMIT_NICE, 0, 'e', "scheduling priority" }, { RLIMIT_RTPRIO, 0, 'r', "real-time priority" }, };

下面看看ulimit工具如何通過getrlimit()/setrlimit()對內核進行資源進行限制。

int FAST_FUNC
shell_builtin_ulimit(char **argv) { unsigned opts; unsigned argc; ... argc = string_array_len(argv); opts = 0; while (1) { struct rlimit limit; const struct limits *l; int opt_char = getopt(argc, argv, ulimit_opt_string); if (opt_char == -1) break; if (opt_char == 'H') { opts |= OPT_hard; continue; } if (opt_char == 'S') { opts |= OPT_soft; continue; } if (opt_char == 'a') { for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) { getrlimit(l->cmd, &limit); printf("-%c: %-30s ", l->option, l->name); printlim(opts, &limit, l); } continue; } if (opt_char == 1) opt_char = 'f'; for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) {----------------------------limits_tbl[]中是struct limits結構體的數組,對應每一個resource資源。 if (opt_char == l->option) {-----------------------------------------------------------------選擇和當前opt_char一致的limits_tbl[]成員,然后進行顯示或者設置。 char *val_str; getrlimit(l->cmd, &limit);---------------------------------------------------------------首先獲取當前類型的resource。 val_str = optarg; if (!val_str && argv[optind] && argv[optind][0] != '-') val_str = argv[optind++]; /* ++ skips NN in "-c NN" case */ if (val_str) {---------------------------------------------------------------------------后面跟上參數的表示是設置,否則就是讀取。 rlim_t val; if (strcmp(val_str, "unlimited") == 0) val = RLIM_INFINITY;-------------------------------------------------------------參數是unlimited類型。 else { if (sizeof(val) == sizeof(int)) val = bb_strtou(val_str, NULL, 10); else if (sizeof(val) == sizeof(long)) val = bb_strtoul(val_str, NULL, 10); else val = bb_strtoull(val_str, NULL, 10); if (errno) { bb_error_msg("invalid number '%s'", val_str); return EXIT_FAILURE; } val <<= l->factor_shift;---------------------------------------------------------將參數轉換成內核識別的值,這里面注意不同參數有factor_shift的區別,這是工具和內核之間的一個轉換。 } //bb_error_msg("opt %c val_str:'%s' val:%lld", opt_char, val_str, (long long)val); /* from man bash: "If neither -H nor -S * is specified, both the soft and hard * limits are set. */ if (!opts)---------------------------------------------------------------------------不指定-H/-S則兩個都設置,否則單獨設置。 opts = OPT_hard + OPT_soft; if (opts & OPT_hard) limit.rlim_max = val; if (opts & OPT_soft) limit.rlim_cur = val; //bb_error_msg("setrlimit(%d, %lld, %lld)", l->cmd, (long long)limit.rlim_cur, (long long)limit.rlim_max); if (setrlimit(l->cmd, &limit) < 0) {-------------------------------------------------將指定類型的resource閾值設置到內核中。 bb_perror_msg("error setting limit"); return EXIT_FAILURE; } } else { printlim(opts, &limit, l); } break; } } /* for (every possible opt) */ if (l == &limits_tbl[ARRAY_SIZE(limits_tbl)]) { /* bad option. getopt already complained. */ break; } } /* while (there are options) */ return 0; } static void printlim(unsigned opts, const struct rlimit *limit, const struct limits *l) { rlim_t val; val = limit->rlim_max; if (!(opts & OPT_hard)) val = limit->rlim_cur; if (val == RLIM_INFINITY) puts("unlimited"); else { val >>= l->factor_shift; printf("%llu\n", (long long) val); } }

至此可以了解到ulimit是如何對內核resource產生影響的。 

2. getrlimit()/setrlimit()內核調用

getrlimit()/setrlimit()系統調用同名,實現如下。但是核心都是do_prlimit()。

SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
    struct rlimit value; int ret; ret = do_prlimit(current, resource, NULL, &value); if (!ret) ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; return ret; } SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim; if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; return do_prlimit(current, resource, &new_rlim, NULL); } int do_prlimit(struct task_struct *tsk, unsigned int resource, struct rlimit *new_rlim, struct rlimit *old_rlim) { struct rlimit *rlim; int retval = 0; if (resource >= RLIM_NLIMITS) return -EINVAL; if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)----------------------RLIMIT_NOFILE不能超過sysctl_nr_open數目。 return -EPERM; }  read_lock(&tasklist_lock); if (!tsk->sighand) { retval = -ESRCH; goto out; } rlim = tsk->signal->rlim + resource; task_lock(tsk->group_leader); if (new_rlim) { if (new_rlim->rlim_max > rlim->rlim_max && !capable(CAP_SYS_RESOURCE)) retval = -EPERM; if (!retval) retval = security_task_setrlimit(tsk->group_leader, resource, new_rlim); if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {  new_rlim->rlim_cur = 1; } } ... task_unlock(tsk->group_leader); if (!retval && new_rlim && resource == RLIMIT_CPU && new_rlim->rlim_cur != RLIM_INFINITY) update_rlimit_cpu(tsk, new_rlim->rlim_cur);--------------------------設置RLIMIT_CPU需要更新CPU相關信息。 out: read_unlock(&tasklist_lock); return retval; }

rlimit的設置比較簡單,使用則分散則各處。

獲取當前系統resource限制接口有:

static inline unsigned long task_rlimit(const struct task_struct *tsk,
        unsigned int limit) { return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *tsk, unsigned int limit) { return READ_ONCE(tsk->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) { return task_rlimit(current, limit); } static inline unsigned long rlimit_max(unsigned int limit) { return task_rlimit_max(current, limit); }

新創建進程/線程的rlimit繼承自父進程的rlimit。

3. 資源分類

下面對各種類型資源在內核中是如何進行限制簡單分析。

3.1 RLIMIT_CPU 0 /* CPU time in sec */

RLIMIT_CPU表示進程CPU運行時間的最大值,單位是秒。

RLIMIT_CPU規定了進程所使用的做大CPU時間,超過soft發送SIGXCPU信號,超過hard發送SIGKILL信號。

static void check_process_timers(struct task_struct *tsk,
				 struct list_head *firing)
{
...
	soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);----------------------------------獲取系統資源soft值。
	if (soft != RLIM_INFINITY) {
		unsigned long psecs = cputime_to_secs(ptime);------------------------------表示當前進程所占用的CPU時間。
		unsigned long hard =
			READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);-------------------------獲取系統資源hard值。
		cputime_t x;
		if (psecs >= hard) {
			/*
			 * At the hard limit, we just die.
			 * No need to calculate anything else now.
			 */
			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);----------------如果進程CPU時間超過hard,則向進程發送SIGKILL信號,殺死進程。
			return;
		}
		if (psecs >= soft) {
			/*
			 * At the soft limit, send a SIGXCPU every second.
			 */
			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);----------------如果進程CPU時間超過soft,則向進程發送SIGXCPU信號。
			if (soft < hard) {
				soft++;
				sig->rlim[RLIMIT_CPU].rlim_cur = soft;
			}
		}
		x = secs_to_cputime(soft);
		if (!prof_expires || x < prof_expires) {
			prof_expires = x;
		}
	}
...
}

3.2 RLIMIT_FSIZE 1 /* Maximum filesize */

RLIMIT_FSIZE表示創建文件大小的最大值,超過此大小則發送SIGXFSZ。

int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
    if (inode->i_size < offset) {
        unsigned long limit;

        limit = rlimit(RLIMIT_FSIZE);---------------------------------獲取系統RLIMIT_FSIZE大小。 if (limit != RLIM_INFINITY && offset > limit)
            goto out_sig;
        if (offset > inode->i_sb->s_maxbytes)
            goto out_big;
    } else {
        /*
         * truncation of in-use swapfiles is disallowed - it would
         * cause subsequent swapout to scribble on the now-freed
         * blocks.
         */
        if (IS_SWAPFILE(inode))
            return -ETXTBSY;
    }

    return 0;
out_sig:
    send_sig(SIGXFSZ, current, 0);------------------------------------發送SIGXFSZ信號。
out_big:
    return -EFBIG;
}

3.3 RLIMIT_DATA 2 /* max data size */

RLIMIT_DATA用於限制數據段大小的最大值。

may_expand_vm()用於判斷是否允許進程擴大自己的vm空間,返回true表示允許,false表示禁止。

bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
{
    if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)------------------------首先檢查進程的total_vm+pages是否大於RLIMIT_AS,如果超過則返回false,表示不允許擴大vm空間。 return false;

    if (is_data_mapping(flags) &&
        mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
        /* Workaround for Valgrind */
        if (rlimit(RLIMIT_DATA) == 0 &&
            mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
            return true;
        if (!ignore_rlimit_data) {
            pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
                     current->comm, current->pid,
                     (mm->data_vm + npages) << PAGE_SHIFT,
                     rlimit(RLIMIT_DATA));
            return false;---------------------------------------------------------------如果區域大於RLIMIT_DATA,並且沒有ignore_rlimit_data,返回false。
        }
    }

    return true;
}

3.4 RLIMIT_STACK 3 /* max stack size */

RLIMIT_STACK表示一個線程/進程棧的最大尺寸。

expand_stack()會對增加后的尺寸進行檢查,確保符合RLIMIT_STACK等一系列限制。

int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
    return expand_downwards(vma, address);
}

int expand_downwards(struct vm_area_struct *vma,
                   unsigned long address)
{
...
    /* Somebody else might have raced and expanded it already */
    if (address < vma->vm_start) {
        unsigned long size, grow;

        size = vma->vm_end - address;
        grow = (vma->vm_start - address) >> PAGE_SHIFT;

        error = -ENOMEM;
        if (grow <= vma->vm_pgoff) {
            error = acct_stack_growth(vma, size, grow);
            if (!error) {
...
            }
        }
    }
    anon_vma_unlock_write(vma->anon_vma);
    khugepaged_enter_vma_merge(vma, vma->vm_flags);
    validate_mm(mm);
    return error;
}

static int acct_stack_growth(struct vm_area_struct *vma,
                 unsigned long size, unsigned long grow)
{
    struct mm_struct *mm = vma->vm_mm;
    struct rlimit *rlim = current->signal->rlim;
    unsigned long new_start;

    /* address space limit tests */
    if (!may_expand_vm(mm, vma->vm_flags, grow))-------------首先檢查內存空間是否夠用。 return -ENOMEM;

    /* Stack limit test */
    if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
        return -ENOMEM;--------------------------------------檢查申請棧size是否超過棧空間限制。 /* mlock limit tests */
    if (vma->vm_flags & VM_LOCKED) {
        unsigned long locked;
        unsigned long limit;
        locked = mm->locked_vm + grow;
        limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
        limit >>= PAGE_SHIFT;
        if (locked > limit && !capable(CAP_IPC_LOCK))
            return -ENOMEM;
    }

    /* Check to ensure the stack will not grow into a hugetlb-only region */
    new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
            vma->vm_end - size;
    if (is_hugepage_only_range(vma->vm_mm, new_start, size))
        return -EFAULT;

    /*
     * Overcommit..  This must be the final test, as it will
     * update security statistics.
     */
    if (security_vm_enough_memory_mm(mm, grow))
        return -ENOMEM;

    return 0;
}

3.5 RLIMIT_CORE 4 /* max core file size */

RLIMIT_CORE限制了coredump產生文件尺寸的最大值,如果為0說明不允許創建core文件。

void do_coredump(const siginfo_t *siginfo)
{
...
    struct coredump_params cprm = {
        .siginfo = siginfo,
        .regs = signal_pt_regs(),
        .limit = rlimit(RLIMIT_CORE),--------------------------------cprm中包含了對coredump文件大小的限制,在具體格式進行coredump過程中會檢查coredump文件是否超過此值。         .mm_flags = mm->flags,
    };
...
}

3.6 RLIMIT_RSS 5 /* max resident set size */

RLIMIT_RSS限制了進程最大實際內存使用量,未起作用。

3.7 RLIMIT_NPROC 6 /* max number of processes */

RLIMIT_NPROC規定了每個real user id的子進程數量的最大值.

do_execueat_common()創建新進程的時候檢查current_user()->processes,如果超過RLIMIT_NPROC則返回EAGAIN,表示資源不夠使用。

copy_process()創建新進程/線程的使用同樣會進行檢查。

static int do_execveat_common(int fd, struct filename *filename,
                  struct user_arg_ptr argv,
                  struct user_arg_ptr envp,
                  int flags)
{
...
    if ((current->flags & PF_NPROC_EXCEEDED) &&
        atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
        retval = -EAGAIN;
        goto out_ret;
    }
...
}

static __latent_entropy struct task_struct *copy_process(
                    unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace,
                    unsigned long tls,
                    int node)
{
...
    if (atomic_read(&p->real_cred->user->processes) >=
            task_rlimit(p, RLIMIT_NPROC)) {
        if (p->real_cred->user != INIT_USER &&
            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
goto bad_fork_free;
        }
    }
...
}

max_threads如何計算?

max_threads的大小是由set_max_threads()計算出來的。

在進程創建的時候fork_init()設置max_threads,或者通過sysctl_max_threads()進行設置。

可以通過/proc/sys/kernel/threads-max獲取當前系統的max_threads。

void __init fork_init(void)
{
...
set_max_threads(MAX_THREADS); ... } int sysctl_max_threads(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; int threads = max_threads; int min = MIN_THREADS; int max = MAX_THREADS; t = *table; t.data = &threads; t.extra1 = &min; t.extra2 = &max; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || !write) return ret; set_max_threads(threads); return 0; } static void set_max_threads(unsigned int max_threads_suggested) { u64 threads; /* * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); }

 

THREAD_SIZE為2個頁面:

#define THREAD_SIZE    (PAGE_SIZE * 2)

  #define MIN_THREADS 20

  #define FUTEX_TID_MASK 0x3fffffff

  #define MAX_THREADS FUTEX_TID_MASK

 

所以max_threads數量為max_threads=totalram_pages*PAGE_SIZE/(THREAD_SIZE*8)。

在totalram_pages為100556的情況下,max_threads=100556/16=6284.75,實際的RLIMIT_NPROC=max_threads/2,即為3142。

可以通過ulimit -p驗證。

3.8 RLIMIT_NOFILE 7 /* max number of open files */

RLIMIT_NOFILE限制進程打開文件數量最大值。

比如alloc_fd()申請文件句柄號,end對應的就是RLIMIT_NOFILES。

static int alloc_fd(unsigned start, unsigned flags)
{
    return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);-----current->files是當前進程的打開文件列表。
}

int get_unused_fd_flags(unsigned flags)
{
    return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}

int __alloc_fd(struct files_struct *files,
           unsigned start, unsigned end, unsigned flags)
{
...
    error = -EMFILE;
    if (fd >= end)---------------------------------------------------------------如果找到的fd超過RLIMIT_NOFILE則返回錯誤。 goto out;
...
}

3.9 RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */

RLIMIT_MEMLOCK用於限制使用mlock()鎖定的locked_vm內存最大使用量。

static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
{
    unsigned long locked;
    unsigned long lock_limit;
    int error = -ENOMEM;

    if (!can_do_mlock())
        return -EPERM;

    lru_add_drain_all();    /* flush pagevec */

    len = PAGE_ALIGN(len + (offset_in_page(start)));
    start &= PAGE_MASK;

    lock_limit = rlimit(RLIMIT_MEMLOCK);-----------------------------------------系統對RLIMIT_MEMLOCK的閾值。
    lock_limit >>= PAGE_SHIFT;
    locked = len >> PAGE_SHIFT;--------------------------------------------------本次mlock內存大小。

    if (down_write_killable(&current->mm->mmap_sem))
        return -EINTR;

    locked += current->mm->locked_vm;--------------------------------------------進程中已經mlock內存大小。
    if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
        locked -= count_mm_mlocked_page_nr(current->mm,
                start, len);
    }

    if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))-------------------------進行mlock內存檢查,如有錯誤返回錯誤類型。
        error = apply_vma_lock_flags(start, len, flags);

    up_write(&current->mm->mmap_sem);
    if (error)
        return error;

    error = __mm_populate(start, len, 0);
    if (error)
        return __mlock_posix_error_return(error);
    return 0;
}

3.10 RLIMIT_AS 9 /* address space limit */

 RLIMIT_AS表示進程可使用的最大虛擬內存大小,超過后則不允許繼續申請內存。

bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
{
    if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)----------------------total_vm加上將要新增內存,如果超過RLIMIT_AS則返回錯誤。 return false;
...
    return true;
}

3.11 RLIMIT_LOCKS 10 /* maximum file locks held */

 RLIMIT_LOCKS表示進程可建立的文件鎖數量最大值,未使用。

3.12 RLIMIT_SIGPENDING 11 /* max number of pending signals */

 RLIMIT_SIGPENDING表示進程信號等待隊列最大大小,一般等RLIMIT_NPROC。

void __init fork_init(void)
{
...
    set_max_threads(MAX_THREADS);

    init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
    init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
    init_task.signal->rlim[RLIMIT_SIGPENDING] =
        init_task.signal->rlim[RLIMIT_NPROC];
...
}

__sigqueue_alloc()中,檢查override_rlimit以及RLIMIT_SIGPENDING,才會對最初是否分配內存給pending信號。否則丟棄。

static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
{
...
    if (override_rlimit ||
        atomic_read(&user->sigpending) <=
            task_rlimit(t, RLIMIT_SIGPENDING)) {-----------------------------------在不使用override_rlimit以及當前用戶sigpending不超過RLIMIT_SIGPENDING條件下,才可以申請sigqueue。
        q = kmem_cache_alloc(sigqueue_cachep, flags);
    } else {
        print_dropped_signal(sig);-------------------------------------------------否則信號將被丟棄。
    }

    if (unlikely(q == NULL)) {
        atomic_dec(&user->sigpending);
        free_uid(user);
    } else {
        INIT_LIST_HEAD(&q->list);
        q->flags = 0;
        q->user = user;
    }

    return q;
}

3.13 RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */

 RLIMIT_MSGQUEUE限制了進程可謂POSIX消息隊列分配的最大字節數,超過限制后返回EMFILE錯誤。

static struct inode *mqueue_get_inode(struct super_block *sb,
        struct ipc_namespace *ipc_ns, umode_t mode,
        struct mq_attr *attr)
{
...
    if (S_ISREG(mode)) {
...
        if (u->mq_bytes + mq_bytes < u->mq_bytes ||
            u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
            spin_unlock(&mq_lock);
            /* mqueue_evict_inode() releases info->messages */
            ret = -EMFILE;
            goto out_inode;
        }
...
    } else if (S_ISDIR(mode)) {
...
    }

    return inode;
out_inode:
    iput(inode);
err:
    return ERR_PTR(ret);
}

3.14 RLIMIT_NICE 13 /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */

 RLIMIT_NICE限制了進程可通過setpriority()或者nice()調用設置的最大nice值。

static void binder_set_nice(long nice)
{
    long min_nice;

    if (can_nice(current, nice)) {
        set_user_nice(current, nice);
        return;
    }
    min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur);
    binder_debug(BINDER_DEBUG_PRIORITY_CAP,
             "%d: nice value %ld not allowed use %ld instead\n",
              current->pid, nice, min_nice);
    set_user_nice(current, min_nice);
    if (min_nice <= MAX_NICE)
        return;
    binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
}

int can_nice(const struct task_struct *p, const int nice)
{
    /* convert nice value [19,-20] to rlimit style value [1,40] */
    int nice_rlim = nice_to_rlimit(nice);

    return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
        capable(CAP_SYS_NICE));----------------------------------------------只有在進程具備CAP_SYS_NICE並且申請的nice值小於RLIMIT_NICE,才會被允許修改nice值。
}

3.15 RLIMIT_RTPRIO 14 /* maximum realtime priority */

 RLIMIT_RTPRIO限制進程可通過sched_setscheduler()和sched_setparam()可設置的最大實時優先級。

對於RT線程,超過RLIMIT_RTPRIO則返回EPERM錯誤。

static int __sched_setscheduler(struct task_struct *p,
                const struct sched_attr *attr,
                bool user, bool pi)
{
...
    if (user && !capable(CAP_SYS_NICE)) {
        if (fair_policy(policy)) {
            if (attr->sched_nice < task_nice(p) &&
                !can_nice(p, attr->sched_nice))
                return -EPERM;
        }

        if (rt_policy(policy)) {
            unsigned long rlim_rtprio =
                    task_rlimit(p, RLIMIT_RTPRIO);

            /* can't set/change the rt policy */
            if (policy != p->policy && !rlim_rtprio)
                return -EPERM;

            /* can't increase priority */
            if (attr->sched_priority > p->rt_priority &&
                attr->sched_priority > rlim_rtprio)
                return -EPERM;
        }
...
    }
...
}

3.16 RLIMIT_RTTIME 15 /* timeout for RT tasks in us */

RLIMIT_RTTIME限制了實時進程timer最大超時時間。

check_thread_timers()中會對定時器超時值進行檢查,並且watchdog()中

static void check_thread_timers(struct task_struct *tsk,
                struct list_head *firing)
{
...
    soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
    if (soft != RLIM_INFINITY) {
        unsigned long hard =
            READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);

        if (hard != RLIM_INFINITY &&
            tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
            /*
             * At the hard limit, we just die.
             * No need to calculate anything else now.
             */
            __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);--------------------------如果實時線程的超時值,超過RLIMIT_RTTIME的rlim_max之后發送SIGKILL信號。 return;
        }
        if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
            /*
             * At the soft limit, send a SIGXCPU every second.
             */
            if (soft < hard) {
                soft += USEC_PER_SEC;
                sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
            }
            printk(KERN_INFO
                "RT Watchdog Timeout: %s[%d]\n",
                tsk->comm, task_pid_nr(tsk));
            __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);-------------------------如果實時線程的超時值,超過RLIMIT_RTTIME的rlim_cur之后發送SIGXCPU信號。
        }
    }
    if (task_cputime_zero(tsk_expires))
        tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}

static void watchdog(struct rq *rq, struct task_struct *p)
{
    unsigned long soft, hard;

    /* max may change after cur was read, this will be fixed next tick */
    soft = task_rlimit(p, RLIMIT_RTTIME);
    hard = task_rlimit_max(p, RLIMIT_RTTIME);

    if (soft != RLIM_INFINITY) {
        unsigned long next;

        if (p->rt.watchdog_stamp != jiffies) {
            p->rt.timeout++;
            p->rt.watchdog_stamp = jiffies;
        }

        next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
        if (p->rt.timeout > next)
            p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
    }
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM