内核启动时会以此调用网络的初始化函数,下面主要分析网络初始化三个函数: sock_init, proto_init 和 inet_init
code所在目录:
linux\linux-4.9.73\net
linux\linux-4.9.73\include\net
1 函数proto_init
主要作用是将一个网络协议模块添加到每一个网络命令空间中,然后再执行其ops->init程序进行初始化,一般其ops->init会在其对应的proc目录下,生成一个网络协议模块对应的proc文件或proc目录,并执行一些协议初始化相关的函数。
定义位于linux-4.9.73\net\core\sock.c
1 static int __init proto_init(void) 2 { 3 return register_pernet_subsys(&proto_net_ops);ops定义如下 4 } 5 static __net_initdata struct pernet_operations proto_net_ops = { 6 .init = proto_init_net, 7 .exit = proto_exit_net, 8 }; 9 int register_pernet_subsys(struct pernet_operations *ops) 10 { 11 int error; 12 mutex_lock(&net_mutex); 13 error = register_pernet_operations(first_device, ops); 14 mutex_unlock(&net_mutex); 15 return error; 16 }
1.1 函数register_pernet_subsys
分析函数register_pernet_subsys之前先看其实参的定义,定义位于linux-4.9.73\net\core\net_namespace.c
1 static LIST_HEAD(pernet_list);//定义一个全局链表 2 static struct list_head *first_device = &pernet_list;//全局链表赋值给first_device
再来分析函数:
1 static int register_pernet_operations(struct list_head *list, 2 struct pernet_operations *ops) 3 { 4 int error; 5 //以下是对ops的一些检查和赋值 6 if (ops->id) { 7 again: 8 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 9 if (error < 0) { 10 if (error == -EAGAIN) { 11 ida_pre_get(&net_generic_ids, GFP_KERNEL); 12 goto again; 13 } 14 return error; 15 } 16 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); 17 } 18 error = __register_pernet_operations(list, ops); 19 if (error) { 20 rcu_barrier(); 21 if (ops->id) 22 ida_remove(&net_generic_ids, *ops->id); 23 } 24 25 return error; 26 }
结构体定义位于:include\net\net_namespace.h
1 struct pernet_operations { 2 struct list_head list;//链表 3 int (*init)(struct net *net);//init函数 4 void (*exit)(struct net *net); 5 void (*exit_batch)(struct list_head *net_exit_list); 6 int *id; 7 size_t size; 8 };
1.2 函数__register_pernet_operations
1 static int __register_pernet_operations(struct list_head *list, 2 struct pernet_operations *ops) 3 { 4 struct net *net; 5 int error; 6 LIST_HEAD(net_exit_list); 7 8 list_add_tail(&ops->list, list);//将ops加入list中,即1.1节中的全局链表per_list 9 if (ops->init || (ops->id && ops->size)) { 10 for_each_net(net) { 11 error = ops_init(ops, net);//ops初始化 12 if (error) 13 goto out_undo; 14 list_add_tail(&net->exit_list, &net_exit_list); 15 } 16 } 17 return 0; 18 19 out_undo: 20 /* If I have an error cleanup all namespaces I initialized */ 21 list_del(&ops->list); 22 ops_exit_list(ops, &net_exit_list); 23 ops_free_list(ops, &net_exit_list); 24 return error; 25 }
1.3 函数ops_init
1 static int ops_init(const struct pernet_operations *ops, struct net *net) 2 { 3 int err = -ENOMEM; 4 void *data = NULL; 5 6 if (ops->id && ops->size) { 7 data = kzalloc(ops->size, GFP_KERNEL); 8 if (!data) 9 goto out; 10 11 err = net_assign_generic(net, *ops->id, data);//分配net_generic *ng 12 if (err) 13 goto cleanup; 14 } 15 err = 0; 16 if (ops->init) 17 err = ops->init(net);//调用1节中的init函数proto_init_net 18 if (!err) 19 return 0; 20 21 cleanup: 22 kfree(data); 23 24 out: 25 return err; 26 }
1.4 函数proto_init_net函数
1 static __net_init int proto_init_net(struct net *net) 2 { 3 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))//proc目录下,生成一个网络协议模块对应的proc文件或proc目录, 4 return -ENOMEM; 5 6 return 0; 7 }
1.5 函数net_assign_generic
1 static int net_assign_generic(struct net *net, int id, void *data) 2 { 3 struct net_generic *ng, *old_ng; 4 5 BUG_ON(!mutex_is_locked(&net_mutex)); 6 BUG_ON(id == 0); 7 8 old_ng = rcu_dereference_protected(net->gen, 9 lockdep_is_held(&net_mutex)); 10 ng = old_ng; 11 if (old_ng->len >= id) 12 goto assign; 13 14 ng = net_alloc_generic();//分配ng 15 if (ng == NULL) 16 return -ENOMEM; 17 18 /* 19 * Some synchronisation notes: 20 * 21 * The net_generic explores the net->gen array inside rcu 22 * read section. Besides once set the net->gen->ptr[x] 23 * pointer never changes (see rules in netns/generic.h). 24 * 25 * That said, we simply duplicate this array and schedule 26 * the old copy for kfree after a grace period. 27 */ 28 29 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 30 31 rcu_assign_pointer(net->gen, ng); 32 kfree_rcu(old_ng, rcu); 33 assign: 34 ng->ptr[id - 1] = data; 35 return 0; 36 }
2 sock_init
主要作用:
sysctl文件的创建;skb高速缓存初始化,它会在slab创建两个节点skbuff_head_cache和skbuff_fclone_cache;注册并挂socket文件系统。
定义位于:linux-4.9.73\net\socket.c
1 static int __init sock_init(void) 2 { 3 int err; 4 /* 5 * Initialize the network sysctl infrastructure. 6 */ 7 err = net_sysctl_init();//创建sys文件,用于管理和查看网络参数 8 if (err) 9 goto out; 10 11 /* 12 * Initialize skbuff SLAB cache 13 */ 14 skb_init();//skb初始化,它会在slab创建两个节点skbuff_head_cache和skbuff_fclone_cache 15 16 /* 17 * Initialize the protocols module. 18 */ 19 20 init_inodecache(); 21 22 err = register_filesystem(&sock_fs_type);//注册sock文件系统 23 if (err) 24 goto out_fs; 25 sock_mnt = kern_mount(&sock_fs_type); 26 if (IS_ERR(sock_mnt)) { 27 err = PTR_ERR(sock_mnt); 28 goto out_mount; 29 } 30 31 /* The real protocol initialization is performed in later initcalls. 32 */ 33 34 #ifdef CONFIG_NETFILTER 35 err = netfilter_init(); 36 if (err) 37 goto out; 38 #endif 39 40 ptp_classifier_init(); 41 42 out: 43 return err; 44 45 out_mount: 46 unregister_filesystem(&sock_fs_type); 47 out_fs: 48 goto out; 49 }
2.1 函数net_sysctl_init
定义位于:net\sysctl_net.c
1 static struct ctl_table_header *net_header; 2 __init int net_sysctl_init(void) 3 { 4 static struct ctl_table empty[1]; 5 int ret = -ENOMEM; 6 /* Avoid limitations in the sysctl implementation by 7 * registering "/proc/sys/net" as an empty directory not in a 8 * network namespace. 9 */ 10 net_header = register_sysctl("net", empty);//待分析 11 if (!net_header) 12 goto out; 13 ret = register_pernet_subsys(&sysctl_pernet_ops);//同第1节 14 if (ret) 15 goto out1; 16 register_sysctl_root(&net_sysctl_root);//待分析 17 out: 18 return ret; 19 out1: 20 unregister_sysctl_table(net_header); 21 net_header = NULL; 22 goto out; 23 }
2.2 函数skb_init
定位于:net/core/skbuff.c
1 void __init skb_init(void) 2 { 3 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",//创建skbuff_head_cache 4 sizeof(struct sk_buff), 5 0, 6 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 7 NULL); 8 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",//创建skbuff_fclone_cache 9 sizeof(struct sk_buff_fclones), 10 0, 11 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 12 NULL); 13 }
2.3 函数init_inodecache
init_inodecache,为创建socket 文件系统做好内存准备。注意的是在Linux内核中存在init_inodecache多个定义,但都是静态型,即只能由该.c文件中的函数调用,在socket.c中, 就定义了这么一个函数。
1 static int init_inodecache(void) 2 { 3 sock_inode_cachep = kmem_cache_create("sock_inode_cache",//创建sock_inode_cache 4 sizeof(struct socket_alloc), 5 0, 6 (SLAB_HWCACHE_ALIGN | 7 SLAB_RECLAIM_ACCOUNT | 8 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 9 init_once); 10 if (sock_inode_cachep == NULL) 11 return -ENOMEM; 12 return 0; 13 }
2.4 socket文件系统
在 linux 系统中,socket属于文件系统的一部分,网络通信可以被看作对文件的读取。这种特殊的文件系统叫sockfs。
2.4.1 函数register_filesystem
1 static struct file_system_type sock_fs_type = { 2 .name = "sockfs", 3 .mount = sockfs_mount, 4 .kill_sb = kill_anon_super, 5 };
2.4.2 函数kern_mount
定义位于:\linux-4.9.73\fs\filesystems.c
1 int register_filesystem(struct file_system_type * fs) 2 { 3 int res = 0; 4 struct file_system_type ** p; 5 6 BUG_ON(strchr(fs->name, '.')); 7 if (fs->next) 8 return -EBUSY; 9 write_lock(&file_systems_lock); 10 p = find_filesystem(fs->name, strlen(fs->name));//找一个此文件系统的位置指针,存放此文件系统 11 if (*p) 12 res = -EBUSY; 13 else 14 *p = fs;//将要注册的文件系统放入此位置 15 write_unlock(&file_systems_lock); 16 return res; 17 }
函数find_filesystem
1 static struct file_system_type **find_filesystem(const char *name, unsigned len) 2 { 3 struct file_system_type **p; 4 for (p = &file_systems; *p; p = &(*p)->next)//从全局文件系统中查找一个位置指针 5 if (strncmp((*p)->name, name, len) == 0 && 6 !(*p)->name[len]) 7 break; 8 return p; 9 }
file_system为全局的文件系统:
static struct file_system_type *file_systems;
2.4.3 函数kern_mount
定义位于:include\linux\fs.h和fs\namespace.c
1 #define kern_mount(type) kern_mount_data(type, NULL) 2 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 3 { 4 struct vfsmount *mnt; 5 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);//挂载到内核的文件系统 6 if (!IS_ERR(mnt)) { 7 /* 8 * it is a longterm mount, don't release mnt until 9 * we unmount before file sys is unregistered 10 */ 11 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; 12 } 13 return mnt; 14 }
3 函数kmem_cache_create
kernel里分配一些小内存用到的是slab分配器,而slub初始化有两个重要的工作:第一,创建用于申请struct kmem_cache和struct kmem_cache_node的kmem_ cache;第二,创建用于常规kmalloc的kmem_cache。创建kmem_cache用到的接口函数即为kmem_cache_create。
1 struct kmem_cache * 2 kmem_cache_create(const char *name, size_t size, size_t align, 3 unsigned long flags, void (*ctor)(void *)) 4 { 5 struct kmem_cache *s = NULL; 6 const char *cache_name; 7 int err; 8 9 get_online_cpus(); 10 get_online_mems(); 11 memcg_get_cache_ids(); 12 13 mutex_lock(&slab_mutex); 14 15 err = kmem_cache_sanity_check(name, size); 16 if (err) { 17 goto out_unlock; 18 } 19 20 /* Refuse requests with allocator specific flags */ 21 if (flags & ~SLAB_FLAGS_PERMITTED) { 22 err = -EINVAL; 23 goto out_unlock; 24 } 25 26 /* 27 * Some allocators will constraint the set of valid flags to a subset 28 * of all flags. We expect them to define CACHE_CREATE_MASK in this 29 * case, and we'll just provide them with a sanitized version of the 30 * passed flags. 31 */ 32 flags &= CACHE_CREATE_MASK; 33 34 s = __kmem_cache_alias(name, size, align, flags, ctor); 35 if (s) 36 goto out_unlock; 37 38 cache_name = kstrdup_const(name, GFP_KERNEL); 39 if (!cache_name) { 40 err = -ENOMEM; 41 goto out_unlock; 42 } 43 44 s = create_cache(cache_name, size, size, 45 calculate_alignment(flags, align, size), 46 flags, ctor, NULL, NULL); 47 if (IS_ERR(s)) { 48 err = PTR_ERR(s); 49 kfree_const(cache_name); 50 } 51 52 out_unlock: 53 mutex_unlock(&slab_mutex); 54 55 memcg_put_cache_ids(); 56 put_online_mems(); 57 put_online_cpus(); 58 59 if (err) { 60 if (flags & SLAB_PANIC) 61 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 62 name, err); 63 else { 64 pr_warn("kmem_cache_create(%s) failed with error %d\n", 65 name, err); 66 dump_stack(); 67 } 68 return NULL; 69 } 70 return s; 71 } 72 EXPORT_SYMBOL(kmem_cache_create);
kmem_cache_create()只是分配size大小的缓存,并不会调用对象的构造函数,只有当再调用kmem_cache_alloc()时才会构造对象,另外调用kmem_cache_create()并没有分配slab,是在创建对象的时候发现没有空闲对象,调用cache_grow()分配一个slab,然后再分配对象。