一、測試環境
運行環境:server: arm64 linux-4.14(開發板)
host: x86_64 ubuntu-20.04(主機)
glic 版本:2.30
二、流程分析:
接上篇的疑問繼續,前提條件,筆者的運行環境中沒有包含任何 libnss 庫,
測試程序如下:
1 #include <sys/types.h>
2 #include <stdio.h>
3 #include <pwd.h>
4
5 int main() 6 { 7 struct passwd *info = NULL; 8
9 info = getpwuid(getuid()); 10 //printf("Name: %s\n", info->pw_name);
11 puts("exec finished."); 12
13 return 0; 14 }
交叉靜態編譯傳到開發板運行: aarch64-linux-gnu-gcc -static -g -o getname test.c
用 strace 跟蹤了下得到以下的結果:
/ # /mnt/usr/bin/strace ./getname execve("./getname", ["./getname"], [/* 7 vars */]) = 0 uname({sysname="Linux", nodename="(none)", ...}) = 0 brk(NULL) = 0x301fc000 brk(0x301fcf60) = 0x301fcf60 readlinkat(AT_FDCWD, "/proc/self/exe", "/getname", 4096) = 8 brk(0x3021df60) = 0x3021df60 brk(0x3021e000) = 0x3021e000 getuid() = 0 socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3 connect(3, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory) close(3) = 0 socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3 connect(3, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory) close(3) = 0 openat(AT_FDCWD, "/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/lib/tls//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/lib/tls/", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/lib/tls/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/lib/tls", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/lib//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/lib/", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0 openat(AT_FDCWD, "/lib/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0 openat(AT_FDCWD, "/usr/lib/tls//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/usr/lib/tls/", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/lib/tls/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/usr/lib/tls", 0xffffc56ce720, 0) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/lib//libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/usr/lib/", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0 openat(AT_FDCWD, "/usr/lib/libnss_compat.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) newfstatat(AT_FDCWD, "/usr/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0 openat(AT_FDCWD, "/lib//libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/lib/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/lib//libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) openat(AT_FDCWD, "/usr/lib/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) fstat(1, {st_mode=S_IFCHR|0660, st_rdev=makedev(5, 1), ...}) = 0 ioctl(1, TCGETS, {B115200 opost isig icanon echo ...}) = 0 write(1, "exec finished.\n", 15exec finished. ) = 15 exit_group(0) = ?
+++ exited with 0 +++
足以說明,即使是靜態鏈接,但程序也是會用到相關動態庫的,雖然進程沒有找到 libnss_xxx 庫,但也並沒有導致它崩潰掉。
要想搞清楚它是怎么引用的動態庫,就必須追根溯源深入底層,那就來分析一下 glibc 吧,探下究竟。
下面就是函數的調用過程依次記錄下來,簡單的流程不加文字簡介:
【 getpwuid.c 】
1 #define LOOKUP_TYPE struct passwd
2 #define FUNCTION_NAME getpwuid
3 #define DATABASE_NAME passwd
4 #define ADD_PARAMS uid_t uid
5 #define ADD_VARIABLES uid
6 #define BUFLEN NSS_BUFLEN_PASSWD
7
8 #include "../nss/getXXbyYY.c"
【 getXXbyYY.c 】
1 #define REENTRANT_NAME APPEND_R (FUNCTION_NAME)
2 #define APPEND_R(name) APPEND_R1 (name)
3 #define APPEND_R1(name) name##_r
4
5 LOOKUP_TYPE *
6 FUNCTION_NAME (ADD_PARAMS) 7 { 8 ... 9 INTERNAL (REENTRANT_NAME) (ADD_VARIABLES, &resbuf, buffer, 10 buffer_size, &result H_ERRNO_VAR) 11 ... 12 }
在這個文件的最下方有個 nss_interface_function (FUNCTION_NAME) 吸引了我們的注意,進去看看它是做什么的,有什么作用,
嚯,原來竟是它!沒有它我們也不會查的這么深入,害人不淺啊。
1 /* Warning for linking functions calling dlopen into static binaries. */
2 #ifdef SHARED 3 #define static_link_warning(name)
4 #else
5 #define static_link_warning(name) static_link_warning1(name)
6 #define static_link_warning1(name) \
7 link_warning(name, "Using '" #name "' in statically linked applications \
8 requires at runtime the shared libraries from the glibc version used \ 9 for linking")
10 #endif
猜想是這個 msg 會被放入指定的 section 內,編譯或鏈接時會將它顯示出來,先不管它我們繼續分析流程,
【 getXXbyYY_r.c 】
1 int INTERNAL (REENTRANT_NAME) (ADD_PARAMS, LOOKUP_TYPE *resbuf, char *buffer, 2 size_t buflen, LOOKUP_TYPE **result H_ERRNO_PARM 3 EXTRA_PARAMS) 4 { 5 ... 6 DB_LOOKUP_FCT (&nip, REENTRANT_NAME_STRING, 7 REENTRANT2_NAME_STRING, &fct.ptr); 8 ... 9 DL_CALL_FCT (fct.l, (ADD_VARIABLES, resbuf, buffer, buflen, 10 &errno H_ERRNO_VAR EXTRA_VARIABLES)); 11
12 ... 13
14 }
【 XXX-lookup.c 】
1 int DB_LOOKUP_FCT (service_user **ni, const char *fct_name, const char *fct2_name, 2 void **fctp) 3 { 4 if (DATABASE_NAME_SYMBOL == NULL 5 && __nss_database_lookup2 (DATABASE_NAME_STRING, ALTERNATE_NAME_STRING, /* 解析 /etc/nsswitch.conf 文件,提取其中的 service */ 6 DEFAULT_CONFIG, &DATABASE_NAME_SYMBOL) < 0) 7 return -1; 8
9 *ni = DATABASE_NAME_SYMBOL; 10
11 return __nss_lookup (ni, fct_name, fct2_name, fctp); /* 凶手!就是它引用了 libnss 相關庫 */ 12 }
【 nsswich.c 】
1 int
2 __nss_lookup (service_user **ni, const char *fct_name, const char *fct2_name, 3 void **fctp) 4 { 5 *fctp = __nss_lookup_function (*ni, fct_name); 6 if (*fctp == NULL && fct2_name != NULL) 7 *fctp = __nss_lookup_function (*ni, fct2_name); 8
9 while (*fctp == NULL 10 && nss_next_action (*ni, NSS_STATUS_UNAVAIL) == NSS_ACTION_CONTINUE 11 && (*ni)->next != NULL) 12 { 13 *ni = (*ni)->next; 14
15 *fctp = __nss_lookup_function (*ni, fct_name); 16 if (*fctp == NULL && fct2_name != NULL) 17 *fctp = __nss_lookup_function (*ni, fct2_name); 18 } 19
20 return *fctp != NULL ? 0 : (*ni)->next == NULL ? 1 : -1; 21 }
【 nsswich.c 】
1 static int nss_load_library (service_user *ni) 2 { 3 ... 4 if (ni->library->lib_handle == NULL) 5 { 6 /* Load the shared library. */
7 size_t shlen = (7 + strlen (ni->name) + 3
8 + strlen (__nss_shlib_revision) + 1); 9 int saved_errno = errno; 10 char shlib_name[shlen]; 11
12 /* Construct shared object name. */
13 __stpcpy (__stpcpy (__stpcpy (__stpcpy (shlib_name, 14 "libnss_"), 15 ni->name), 16 ".so"), 17 __nss_shlib_revision); 18
19 ni->library->lib_handle = __libc_dlopen (shlib_name); 20 ... 21 } 22
23 void *__nss_lookup_function (service_user *ni, const char *fct_name) 24 { 25 ... 26 #if !defined DO_STATIC_NSS || defined SHARED
27 /* Load the appropriate library. */
28 if (nss_load_library (ni) != 0) 29 /* This only happens when out of memory. */
30 goto remove_from_tree; 31
32 if (ni->library->lib_handle == (void *) -1l) 33 /* Library not found => function not found. */
34 result = NULL; 35 else
36 { 37 /* Get the desired function. */
38 size_t namlen = (5 + strlen (ni->name) + 1
39 + strlen (fct_name) + 1); 40 char name[namlen]; 41
42 /* Construct the function name. */
43 __stpcpy (__stpcpy (__stpcpy (__stpcpy (name, "_nss_"), 44 ni->name), 45 "_"), 46 fct_name); 47
48 /* Look up the symbol. */
49 result = __libc_dlsym (ni->library->lib_handle, name); 50 } 51 #else
52 /* We can't get function address dynamically in static linking. */
53 { 54 # define DEFINE_ENT(h,nm) \ 55 { #h"_get"#nm"ent_r", _nss_##h##_get##nm##ent_r }, \ 56 { #h"_end"#nm"ent", _nss_##h##_end##nm##ent }, \ 57 { #h"_set"#nm"ent", _nss_##h##_set##nm##ent }, 58 # define DEFINE_GET(h,nm) \ 59 { #h"_get"#nm"_r", _nss_##h##_get##nm##_r }, 60 # define DEFINE_GETBY(h,nm,ky) \ 61 { #h"_get"#nm"by"#ky"_r", _nss_##h##_get##nm##by##ky##_r }, 62 static struct fct_tbl { const char *fname; void *fp; } *tp, tbl[] =
63 { 64 # include "function.def"
65 { NULL, NULL } 66 }; 67 size_t namlen = (5 + strlen (ni->name) + 1
68 + strlen (fct_name) + 1); 69 char name[namlen]; 70
71 /* Construct the function name. */
72 __stpcpy (__stpcpy (__stpcpy (name, ni->name), 73 "_"), 74 fct_name); 75
76 result = NULL; 77 for (tp = &tbl[0]; tp->fname; tp++) 78 if (strcmp (tp->fname, name) == 0) 79 { 80 result = tp->fp; 81 break; 82 } 83 } 84 #endif
85
86 /* Remember function pointer for later calls. Even if null, we 87 record it so a second try needn't search the library again. */
88 known->fct_ptr = result; 89 #ifdef PTR_MANGLE 90 PTR_MANGLE (known->fct_ptr); 91 #endif
92 } 93 ... 94 }
上面那個宏定義就是決定是否已經是靜態鏈接過了 libnss 庫,是或不是會走到兩個流程,筆者找到一個消除警告的解決辦法,應該是重新編譯了 glibc,搞出來一個靜態的 libnss,正如筆者找到的相關回復所說:
I get the same warning and to fix it I recompiled glibc. Turn on switch --enable-static-nss when configuring to get it to work. (version >= 2.20)
請注意源碼中的字符串拼接操作,通過拼接找到指定的動態庫,也通過拼接進而找到指定的函數符號。
通過拼接所得到的函數名應該為:_nss_files_getpwuid_r,那么該函數是在哪里定義的,我們繼續探索發現了 DB_LOOKUP 定義的一個相關結構,
【 files-pwd.c 】
1 DB_LOOKUP (pwuid, '=', 20, ("%lu", (unsigned long int) uid), 2 { 3 if (result->pw_uid == uid && result->pw_name[0] != '+'
4 && result->pw_name[0] != '-') 5 break; 6 }, uid_t uid)
那么 DB_LOOKUP 是在哪里定義的,通過搜索找了這里,它就是我們要找的 _nss_files_getpwuid_r,這時我們可以得知,無論是動態鏈接還是靜態鏈接,調用 getpwuid 后最終走到的函數就是這里沒錯了:
【 files-XXX.c 】
1 #define DB_LOOKUP(name, db_char, keysize, keypattern, break_if_match, proto...)\
2 enum nss_status \ 3 _nss_files_get##name##_r (proto, \ 4 struct STRUCTURE *result, char *buffer, \ 5 size_t buflen, int *errnop H_ERRNO_PROTO) \ 6 { \ 7 enum nss_status status; \ 8 FILE *stream = NULL; \ 9 \ 10 /* Open file. */ \ 11 status = internal_setent (&stream); \ 12 \ 13 if (status == NSS_STATUS_SUCCESS) \ 14 { \ 15 while ((status = internal_getent (stream, result, buffer, buflen, errnop \ 16 H_ERRNO_ARG EXTRA_ARGS_VALUE)) \ 17 == NSS_STATUS_SUCCESS) \ 18 { break_if_match } \ 19 \ 20 internal_endent (&stream); \ 21 } \ 22 \ 23 return status; \ 24 }
核心函數的大致流程就是打開 /etc/passwd 文件,解析文件內容得到登錄用戶名、ID 之類的信息,沒有詳細看不是本文的目的,最終關掉文件句柄。
三、分析總結
哇,終於簡單的分析完了調用流程,說實話 glibc 屬實不太好分析,宏太多,但不得承認人家寫的牛B。
文末扣題,在調用 libnss 中實現的相關 api 時,實時你的 nss 庫是動態鏈接的,那么無論你是可執行程序是想是靜態鏈接還是動態鏈接,你的運行環境中都要去包含 nss 動態庫;
否則你需要在編譯 glibc 時 enable static nss 實現 nss 庫的靜態鏈接,這樣的話,可執行程序就可以實現完全靜態了,以達到在任何環境中無依賴運行的目的!