一、ftrace的宏
1. struct tracepoint 結構
使用 struct tracepoint 變量來描述一個 trace point。
//include/linux/tracepoint-defs.h struct tracepoint { const char *name; //trace point的名字,內核中通過hash表管理所有的trace point,找到對應的hash slot后,需要通過name來識別具體的trace point。 struct static_key key; //trace point狀態,0表示disable,1表示enable,static_key_false(&key)判斷的其實就只是key的真假。 struct static_call_key *static_call_key; void *static_call_tramp; void *iterator; int (*regfunc)(void); //添加樁函數的函數 void (*unregfunc)(void); //卸載樁函數的函數 struct tracepoint_func __rcu *funcs; //trace point中所有的樁函數鏈表. 是個數組 }; struct tracepoint_func { void *func; void *data; int prio; };
static key使用見:https://www.cnblogs.com/hellokitty2/p/15026568.html
2. DEFINE_TRACE_FN 展開后是
/* * include/linux/tracepoint.h * 就是定義一個名為 __tracepoint_##_name 的 struct tracepoint 結構, * 然后定義一個名為 __traceiter_##_name 的函數,它對 struct tracepoint::funcs[] 成員數組中的每個函數都進行調用,數組尾部要以NULL結尾。 */ #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key __SCK__tp_func_##_name; \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_##_name, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL \ }; \ __TRACEPOINT_ENTRY(_name); \ int __nocfi __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ it_func = (it_func_ptr)->func; \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_##_name; \ extern typeof(__traceiter_##_name) __SCT__tp_func_##_name; \ struct static_call_key __SCK__tp_func_##_name = { \ .func = __traceiter_##_name, \ }
3. __DECLARE_TRACE 宏展開后就是:
/* * include/linux/tracepoint.h * 這個宏主要定義了一系列函數集合,常用的有 register_trace_##name、 * trace_##name##_enabled * rcuidle 的還特殊弄了一個函數,還可以注冊帶有優先級的trace */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_##name(data_proto); \ extern struct static_call_key __SCK__tp_func_##name; \ extern typeof(__traceiter_##name) __SCT__tp_func_##name; \ extern struct tracepoint __tracepoint_##name; \ static inline void __nocfi trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!(cond)) \ return; \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ preempt_enable_notrace(); \ } while (0) \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ rcu_read_unlock_sched_notrace(); \ } \ } \ static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!(cond)) \ return; \ \ /* srcu can't be used from NMI */ \ WARN_ON_ONCE(in_nmi()); \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ /* \ * For rcuidle callers, use srcu since sched-rcu \ * doesn't work from the idle path. \ */ \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ rcu_irq_enter_irqson(); \ \ it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ \ rcu_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ \ preempt_enable_notrace(); \ } while (0) \ } \ static inline int register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_##name, (void *)probe, data); \ } \ static inline int register_trace_prio_##name(void (*probe)(data_proto), void *data, int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_##name, (void *)probe, data, prio); \ } \ static inline int unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name, (void *)probe, data); \ } \ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ }
trace_##name(proto) 中判斷 __tracepoint_##name.key 的值為真才會調用執行各個鈎子函數,在下面路徑中會將這個key設置為真。
register_trace_##name() //具體tracepoint的define位置 tracepoint_probe_register //tracepoint.c tracepoint_probe_register_prio //tracepoint.c tracepoint_add_func //tracepoint.c static_key_enable(&tp->key);
也就是說注冊了 hook 才會真,否則為假。
4. 使用 DECLARE_TRACE 的宏
#define DEFINE_TRACE(name, proto, args) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); //為空 #define TRACE_EVENT_FLAGS(event, flag) //為空 #define TRACE_EVENT_PERF_PERM(event, expr...) /* * include/linux/tracepoint-defs.h * 不建議直接使用,此頭文件是包含在最head位置的 */ #define DECLARE_TRACEPOINT(tp) extern struct tracepoint __tracepoint_##tp /* * 建議使用,它的作用和 trace_##name##_enabled(void) 一致,但是在頭文件中 * 使用是安全的,然而 trace_##name##_enabled(void) 在頭文件中是不安全的,應 * 該是因為不能重復定義一個函數。 */ #define tracepoint_enabled(tp) static_key_false(&(__tracepoint_##tp).key) /* * include/linux/tracepoint.h * 就是上面的一組函數集合,包含register_trace_##name、trace_##name##_enabled 等 */ #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) /* * 兩個宏之間的區別就是后者 arg4 邏輯與上了 cond 參數,主要是 trace_##name、trace_##name##_rcuidle 兩個函數中使用,若是判斷 cond 為假, * 就直接返回了。 */ #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), PARAMS(void *__data, proto), PARAMS(__data, args)) /* include/linux/tracepoint.h */ #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, assign, print, reg, unreg) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, assign, print, reg, unreg) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, print) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #define DECLARE_EVENT_NOP(name, proto, args) \ static inline void trace_##name(proto) \ { } \ static inline bool trace_##name##_enabled(void) \ { \ return false; \ } #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT_NOP(template, name, proto, args) DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
tracepoint.h 中的定義可能不是最終的,因為有文件中會先執行 #undef XXX,然后重新進行 define。觀察可以發現,這些宏主要使用的是 DECLARE_TRACE,對照展開后的函數,顯示是不完整的,因為 DEFINE_TRACE 相關的部分沒有。因此每個trace應該還存在對 DEFINE_TRACE 進行使用的一部分。兩者都存在,一個trace才圓滿。
5. 使用 DEFINE_TRACE 的部分
/* include/trace/define_trace.h */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_CONDITION #define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), PARAMS(tstruct), PARAMS(assign), PARAMS(print)) #undef TRACE_EVENT_FN #define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_FN_COND #define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) #undef DEFINE_EVENT_NOP #define DEFINE_EVENT_NOP(template, name, proto, args) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_FN #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #undef DEFINE_EVENT_CONDITION #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
6. EXPORT_TRACEPOINT_SYMBOL_GPL 和 EXPORT_TRACEPOINT_SYMBOL
導出這些trace符號后,模塊中才能在模塊中使用
/* * include/linux/tracepoint.h * 展開后就是 */ #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_SYMBOL_GPL(__SCK__tp_func_##name); #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_SYMBOL(__SCK__tp_func_##name)
7. 定義一個trace,TRACE_EVENT 各個成員使用的宏
/* include/linux/tracepoint.h */ #define PARAMS(args...) args #define TP_PROTO(args...) args #define TP_ARGS(args...) args #define TP_CONDITION(args...) args //include/trace/trace_events.h #define TP_STRUCT__entry(args...) args #define TP_fast_assign(args...) args #define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args)
include/trace/events/sched.h 文件中定義了大量的CPU調度相關的trace,但是它只include了 linux/tracepoint.h 文件,說明其使用的宏全部都是來自linux/tracepoint.h 文件的,但是 tracepoint.h 中又包含了其它頭文件,不排除其它頭文件中又包含了其它頭文件,比如 include/trace/trace_events.h 。
8. 以 sched_migrate_task 為例來看 TRACE_EVENT
//include/trace/events/sched.h TRACE_EVENT(sched_migrate_task, TP_PROTO(struct task_struct *p, int dest_cpu), TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, orig_cpu ) __field( int, dest_cpu ) __field( int, running ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; __entry->running = (p->state == TASK_RUNNING); ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu, __entry->running) );
include/linux/tracepoint.h 中有注釋:__field(pid_t, prev_prid) 等於 pid_t prev_pid; __array(char, prev_comm, TASK_COMM_LEN) 等於 char prev_comm[TASK_COMM_LEN];
聲明的 'local variable' 叫做 '__entry',可以在 TP_fast_assign 中使用 __entry->XX 來引用。TP_STRUCT__entry 指定環形緩沖區中的存儲格式,也是 /sys/kernel/debug/tracing/events/<*>/format 導出到用戶空間的格式。
按照如下宏定義進行展開:
#define TRACE_EVENT(name, proto, args, struct, assign, print) DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) //直接映射也就是: #define TRACE_EVENT(name, proto, args, struct, assign, print) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto), PARAMS(__data, args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_TRACE(name, proto, args) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); //直接映射也就是: #define TRACE_EVENT(name, proto, args, struct, assign, print) DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
全部展開后為:
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu); \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task; \ extern struct tracepoint __tracepoint_sched_migrate_task; \ static inline void __nocfi trace_sched_migrate_task(struct task_struct *p, int dest_cpu) \ { \ if (static_key_false(&__tracepoint_sched_migrate_task.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!cpu_online(raw_smp_processor_id())) \ return; \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_sched_migrate_task(__data, p, dest_cpu); \ } \ preempt_enable_notrace(); \ } while (0) \ if (IS_ENABLED(CONFIG_LOCKDEP) && cpu_online(raw_smp_processor_id())) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_sched_migrate_task.funcs);\ rcu_read_unlock_sched_notrace(); \ } \ } \ static inline void trace_sched_migrate_task_rcuidle(struct task_struct *p, int dest_cpu) \ { \ if (static_key_false(&__tracepoint_sched_migrate_task.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ void *__data; \ \ if (!cpu_online(raw_smp_processor_id())) \ return; \ \ /* srcu can't be used from NMI */ \ WARN_ON_ONCE(in_nmi()); \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ /* \ * For rcuidle callers, use srcu since sched-rcu \ * doesn't work from the idle path. \ */ \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ rcu_irq_enter_irqson(); \ \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_sched_migrate_task(__data, p, dest_cpu); \ } \ \ rcu_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ \ preempt_enable_notrace(); \ } while (0) \ } \ static inline int register_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_sched_migrate_task, (void *)probe, data); \ } \ static inline int register_trace_prio_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data, int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_sched_migrate_task, (void *)probe, data, prio); \ } \ static inline int unregister_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_sched_migrate_task, (void *)probe, data); \ } \ static inline void check_trace_callback_type_sched_migrate_task(void (*cb)(void *__data, struct task_struct *p, int dest_cpu)) \ { \ } \ static inline bool trace_sched_migrate_task_enabled(void) \ { \ return static_key_false(&__tracepoint_sched_migrate_task.key); \ } #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_sched_migrate_task[] \ __section("__tracepoints_strings") = "sched_migrate_task"; \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu); \ struct tracepoint __tracepoint_sched_migrate_task __used __section("__tracepoints") = { \ .name = __tpstrtab_sched_migrate_task, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_sched_migrate_task, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_sched_migrate_task, \ .regfunc = NULL, \ .unregfunc = NULL, \ .funcs = NULL \ }; \ __TRACEPOINT_ENTRY(sched_migrate_task); \ int __nocfi __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \ if (it_func_ptr) { \ do { \ it_func = (it_func_ptr)->func; \ __data = (it_func_ptr)->data; \ ((void(*)(void *, struct task_struct *p, int dest_cpu))(it_func))(__data, p, dest_cpu); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_sched_migrate_task; \ extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task; \ struct static_call_key __SCK__tp_func_sched_migrate_task = { \ .func = __traceiter_sched_migrate_task, \ }
TODO: 其它部分是怎么起作用的?
從展開后的內容可以看到,當調用 trace_sched_migrate_task() 進行trace的時候,會調用 __traceiter_sched_migrate_task() 來遍歷 struct tracepoint::funcs 數組中的每一個函數進行trace,也就是說一個trace上可以注冊多個hook函數。
若使用 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_migrate_task) 導出,上面加黑加粗的 __tracepoint_sched_migrate_task __traceiter_sched_migrate_task __SCK__tp_func_sched_migrate_task 三個符號會被導出來。
9. 一個trace上注冊多個hook
既然一個trace上可以注冊多個hook,那么一定會涉及到這些hook函數的調用次序的問題,見 tracepoint_probe_register 實現可知,有一個默認優先級 TRACEPOINT_DEFAULT_PRIO=10,注冊函數中會傳遞給 struct tracepoint_func::prio,在插入到 struct tracepoint::funcs 數組時會判斷優先級,優先級數值越大,越插在靠前的位置,相同優先級的話,后注冊的插在后面。 比如此例子中,注冊默認優先級的使用函數 register_trace_sched_migrate_task,自己指定優先級使用函數 register_trace_prio_sched_migrate_task。
int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } EXPORT_SYMBOL_GPL(tracepoint_probe_register);
二、Google搞的vendor hook
1. hook 的 DEFINE_HOOK_FN 解析后是
//include/trace/hooks/vendor_hooks.h #define DEFINE_HOOK_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key __SCK__tp_func_##_name; \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &__SCK__tp_func_##_name, \ .static_call_tramp = NULL, \ .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ int __nocfi __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ \ it_func_ptr = (&__tracepoint_##_name)->funcs; //不同:這里是直接訪問的,ftrace是rcu_dereference_raw \ it_func = (it_func_ptr)->func; //不同:這里是先獲取一個,ftrace中的是先判斷it_func_ptr \ do { \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ it_func = READ_ONCE((++it_func_ptr)->func); \ } while (it_func); \ return 0; \ } \ extern struct static_call_key __SCK__tp_func_##_name; \ extern typeof(__traceiter_##_name) __SCT__tp_func_##_name; \ struct static_call_key __SCK__tp_func_##_name = { \ .func = __traceiter_##_name, \ }
注意備注上的一些和ftrace之間的不同點。
2. hook 的 __DECLARE_HOOK 解析后是:
//include/trace/hooks/vendor_hooks.h #define __DECLARE_HOOK(name, proto, args, cond, data_proto, data_args) \ extern int __traceiter_##name(data_proto); \ extern struct static_call_key __SCK__tp_func_##name; \ extern typeof(__traceiter_##name) __SCT__tp_func_##name; extern struct tracepoint __tracepoint_##name; \ \ static inline void __nocfi trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ \ if (!(cond)) \ return; \ \ it_func_ptr = (&__tracepoint_##name)->funcs; \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ __traceiter_##name(data_args); \ } \ } while (0) \ } \ static inline bool trace_##name##_enabled(void) \ { \ return static_key_false(&__tracepoint_##name.key); \ } \ static inline int register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return android_rvh_probe_register(&__tracepoint_##name, (void *)probe, data); \ } \ /* vendor hooks cannot be unregistered */ \
相比與ftrace,hook的trace 刪除了 trace_##name##_rcuidle()、register_trace_prio_##name()、unregister_trace_##name()、check_trace_callback_type_##name()
3. 其它宏
#undef DECLARE_RESTRICTED_HOOK #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ DEFINE_HOOK_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)) #undef DECLARE_RESTRICTED_HOOK #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \ __DECLARE_HOOK(name, PARAMS(proto), PARAMS(args), cond, PARAMS(void *__data, proto),PARAMS(__data, args))
4. 總結
Google的vendor hook在ftrace的基礎上做了改動,由於Google的Hook宏刪除了ftrace中的 register_trace_prio_##name(),因此不能注冊帶有優先級的鈎子函數了。
三、實驗
1. 對5.10內核中的 util_est_update 中的trace添加hook
static inline void util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) //fair.c { ... //Google 搞的 vendor hook trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret); if (ret) return; ... //普通的ftrace trace_sched_util_est_se_tp(&p->se); }
這兩個trace符號Google已經導出來了:
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_util_est_update); //vendor_hooks.c EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); //core.c
2. 實驗代碼
/* 1. 包含頭文件 */ #include <trace/events/sched.h> /* 2. 實現handler鈎子函數,類型要與 trace##name() 的類型相同 */ //util_est_update() //fair.c void android_rvh_util_est_update_handler(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) { struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("start: first_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); *ret_o = 0; } void android_rvh_util_est_update_handler_second(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o) { struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("start: second_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n", se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep); *ret_o = 0; } //只改變這一個debug優先級, 默認優先級是10 void sched_util_est_se_tp_handler(void *data, struct sched_entity *se) { static int count = 0; int prio = 10; if (entity_is_task(se)) { struct task_struct *p = container_of(se, struct task_struct, se); struct rq *rq = cpu_rq(task_cpu(p)); struct cfs_rq *cfs_rq = &rq->cfs; struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); } else { trace_printk("end: se is not task\n"); } } void sched_util_est_se_tp_handler_prio_12(void *data, struct sched_entity *se) { static int count = 0; int prio = 12; if (entity_is_task(se)) { struct task_struct *p = container_of(se, struct task_struct, se); struct rq *rq = cpu_rq(task_cpu(p)); struct cfs_rq *cfs_rq = &rq->cfs; struct util_est *se_ue = &p->se.avg.util_est; struct util_est *rq_ue = &cfs_rq->avg.util_est; trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n", count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma); } else { trace_printk("end: se is not task\n"); } } /* 3. 注冊handler */ //common register register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler, NULL); register_trace_sched_util_est_se_tp(sched_util_est_se_tp_handler, NULL); //google vendor couldn't use prio, because not defined. register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler_second, NULL); //ftrace register with prio. register_trace_prio_sched_util_est_se_tp(sched_util_est_se_tp_handler_prio_12, NULL, 12);
3. 實驗結果,打印的前后關系:
# echo 1 > tracing_on # cat trace_pipe <...>-338 [005] d..3 32.158404: sched_util_est_se_tp_handler_prio_12: end: count=28494, prio=12, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 <...>-338 [005] d..3 32.158404: sched_util_est_se_tp_handler: end: count=28493, prio=10, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0 <...>-338 [005] d..2 32.158410: android_rvh_util_est_update_handler: start: first_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1 <...>-338 [005] d..2 32.158410: android_rvh_util_est_update_handler_second: start: second_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1
普通ftrace,注冊時指定的優先級數值越大,越先調用。vendor hook 沒有帶有優先級注冊的鈎子函數,先注冊的鈎子函數調用在前,后注冊的鈎子函數調用在后。
看代碼實現,就算是不執行 “echo 1 > tracing_on” 這些鈎子函數應該也會被調用執行,只不過不會打印出來。
4. 另一種注冊trace hook的方法
struct tracepoints_table { const char *name; void *func; struct tracepoint *tp; bool registered; }; static struct tracepoints_table g_tracepoints_table[] = { {.name = "android_rvh_util_est_update", .func = android_rvh_util_est_update_handler}, {.name = "sched_util_est_se_tp", .func = sched_util_est_se_tp_handler}, }; static void lookup_tracepoints(struct tracepoint *tp, void *ignore) { int i; for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { if (!strcmp(g_tracepoints_table[i].name, tp->name)) g_tracepoints_table[i].tp = tp; } } static void register_tracepoints_table(void) { int i, ret; struct tracepoints_table *tt; for_each_kernel_tracepoint(lookup_tracepoints, NULL); //找到匹配的tracepoint結構 for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) { tt = &g_tracepoints_table[i]; if (tt->tp) { ret = tracepoint_probe_register(tt->tp, tt->func, NULL); if (ret) { pr_info("couldn't activate tracepoint %pf\n", tt->func); tracepoint_cleanup(i); } tt->registered = true; } } } void tracepoint_cleanup(int index) { int i; struct tracepoints_table *tt; for (i = 0; i < index; i++) { tracepoints_table *tt = &g_tracepoints_table[i]; if (tt->registered) { tracepoint_probe_unregister(tt->tp, tt->func, NULL); tt->registered = false; } } }
可見這種注冊需要便利 tracepoint 區域對name進行對比,效率比較低,優點是涉及的文件比較少。
