参考:http://lwn.net/Articles/322666/
http://blog.csdn.net/lcw_202/article/details/7290775
http://m.blog.chinaunix.net/uid-14528823-id-4567325.html
1、静态探测点,是在内核代码中调用ftrace提供的相应接口实现,称之为静态是因为,是在内核代码中写死的,静态编译到内核代码中的,在内核编译后,就不能再动态修改。在开启ftrace相关的内核配置选项后,内核中已经在一些关键的地方设置了静态探测点,需要使用时,即可查看到相应的信息。
2、动态探测点,基本原理为:利用mcount机制,在内核编译时,在每个函数入口保留数个字节,然后在使用ftrace时,将保留的字节替换为需要的指令,比如跳转到需要的执行探测操作的代码。
ftrace利用了gcc的profile特性,gcc 的 -pg 选项将在每个函数的入口处加入对mcount的代码调用。
如果ftrace编写了自己的mcount stub函数,则可借此实现trace功能。
但是,在每个内核函数入口加入trace代码,必然影响内核的性能,为了减小对内核性能的影响,ftrace支持动态trace功能。
当COFNIG_DYNAMIC_FTRACE被选中后,内核编译时会调用recordmcount.pl脚本,将每个函数的地址写入一个特殊的段:__mcount_loc
1、scripts/Makefile.build:
ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") RECORDMCOUNT_FLAGS = -w endif # Due to recursion, we must skip empty.o. # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C # files, including recordmcount. sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ fi; recordmcount_source := $(srctree)/scripts/recordmcount.c \ $(srctree)/scripts/recordmcount.h else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl endif cmd_record_mcount = \ if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ $(sub_cmd_record_mcount) \ fi; endif
define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ $(cmd_modversions) \ $(call echo-cmd,record_mcount) \ $(cmd_record_mcount) \ scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ $(dot-target).tmp; \ rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd endef
# Built-in and composite module parts $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c)
2、include/asm-generic/vmlinux.lds.h
#ifdef CONFIG_FTRACE_MCOUNT_RECORD #define MCOUNT_REC() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_mcount_loc) = .; \ *(__mcount_loc) \ VMLINUX_SYMBOL(__stop_mcount_loc) = .; #else #define MCOUNT_REC() #endif
ftrace_init初始化:
start_kernel |-->ftrace_init(); |-->rest_init();
在function_trace_call函数内,ftrace记录函数调用堆栈信息,并将结果写入ring buffer。用户可以通过debugfs的trace文件读取该ring buffer中的内容。
function_trace_call |-->trace_function(tr, ip, parent_ip, flags, pc); |-->struct ftrace_event_call *call = &event_function; | struct ring_buffer *buffer = tr->trace_buffer.buffer; | struct ring_buffer_event *event; | struct ftrace_entry *entry; |-->event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), flags, pc); | |-->struct task_struct *tsk = current; | | entry->preempt_count = pc & 0xff; | | entry->pid = (tsk) ? tsk->pid : 0; |-->entry = ring_buffer_event_data(event); |-->entry->ip = ip; |-->entry->parent_ip = parent_ip;
irqsoff tracer的实现
irqsoff tracer的实现依赖于IRQ-Flags。在中断关闭时,记录下当时的时间戳,此后,中断被打开时,再计算时间差,由此便可得到中断禁止时间。
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)