Skip to content

Latest commit

 

History

History
208 lines (172 loc) · 7.36 KB

File metadata and controls

208 lines (172 loc) · 7.36 KB

第 4 周

kernel 学习

kprobe

kprobe 可以用于动态地在内核中添加探测点,可以在进入断点前、结束后执行回调函数,被广泛应用在 SystemTap 等技术中。

  • 将内联函数注册为探测点时,可能无法保证对该函数的所有实例均注册为探测点,要注意 GCC 自动内联;
  • 回调函数可以修改被探测函数运行的上下文,因此可以用来修复 Bug 等;
  • 黑名单中的函数不可被探测,如试图探测它自身可能引发无限循环;
  • 回调函数中的函数调用不会触发断点,如探测 printk 时在回调函数中使用 printk 不会再次进入探测点;
  • kprobe 回调函数运行时会关内核抢占,因此不能调用信号量等放弃 CPU 的函数
  • 如果一个函数调用后可能永不返回,如 do_exit,可能无法正常使用 kprobe
  • 在 x86_64 中,kprobe 把指令拷贝后,将原始指令入口替换为 int3(内核开启 CONFIG_OPTPROBES 选项后会用更优的跳转指令来代替);CPU 到 int3 时会陷入 trap,在 trap 的处理函数中会设置 kprobe 的调用状态并调用注册的 pre_handler 回调函数;kprobe 单步执行被拷贝的指令,(x86_64 设置单步调试 flag);执行完成后执行 post_handler 回调函数;最后回到正常的执行流程。(如果在 pre_handler 或者 post_handler 或者单步调试中出现异常,执行 fault_handler

include/linux/kprobes.h:

struct kprobe {
	struct hlist_node hlist;

	/* list of kprobes for multi-handler support */
	struct list_head list;

	/*count the number of times this probe was temporarily disarmed */
	unsigned long nmissed;

	/* location of the probe point */
	kprobe_opcode_t *addr;

	/* Allow user to indicate symbol name of the probe point */
	const char *symbol_name;

	/* Offset into the symbol */
	unsigned int offset;

	/* Called before addr is executed. */
	kprobe_pre_handler_t pre_handler;

	/* Called after addr is executed, unless... */
	kprobe_post_handler_t post_handler;

	/*
	 * ... called if executing addr causes a fault (eg. page fault).
	 * Return 1 if it handled fault, otherwise kernel will see it.
	 */
	kprobe_fault_handler_t fault_handler;

	/* Saved opcode (which has been replaced with breakpoint) */
	kprobe_opcode_t opcode;

	/* copy of the original instruction */
	struct arch_specific_insn ainsn;

	/*
	 * Indicates various status flags.
	 * Protected by kprobe_mutex after this kprobe is registered.
	 */
	u32 flags;
};

范例:samples/kprobes/kprobe_example.c

/*
 * NOTE: This example is works on x86 and powerpc.
 * Here's a sample kernel module showing the use of kprobes to dump a
 * stack trace and selected registers when _do_fork() is called.
 *
 * For more information on theory of operation of kprobes, see
 * Documentation/kprobes.txt
 *
 * You will see the trace data in /var/log/messages and on the console
 * whenever _do_fork() is invoked to create a new process.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

#define MAX_SYMBOL_LEN	64
static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
	.symbol_name	= symbol,// 被探测函数的名字
};
// pt_regs 中有寄存器的值
/* kprobe pre_handler: called just before the probed instruction is executed */
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_X86
	pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->ip, regs->flags);
	// 相当于 printk(KERN_INFO ...)
#endif
#ifdef CONFIG_PPC
	pr_info("<%s> pre_handler: p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
		p->symbol_name, p->addr, regs->nip, regs->msr);
#endif
#ifdef CONFIG_MIPS
	pr_info("<%s> pre_handler: p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
		p->symbol_name, p->addr, regs->cp0_epc, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
			" pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
#endif
#ifdef CONFIG_S390
	pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->psw.addr, regs->flags);
#endif

	/* A dump_stack() here will give a stack backtrace */
	return 0;
}

/* kprobe post_handler: called after the probed instruction is executed */
static void handler_post(struct kprobe *p, struct pt_regs *regs,
				unsigned long flags)
{
#ifdef CONFIG_X86
	pr_info("<%s> post_handler: p->addr = 0x%p, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->flags);
#endif
#ifdef CONFIG_PPC
	pr_info("<%s> post_handler: p->addr = 0x%p, msr = 0x%lx\n",
		p->symbol_name, p->addr, regs->msr);
#endif
#ifdef CONFIG_MIPS
	pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
		p->symbol_name, p->addr, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pstate);
#endif
#ifdef CONFIG_S390
	pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->flags);
#endif
}

/*
 * fault_handler: this is called if an exception is generated for any
 * instruction within the pre- or post-handler, or when Kprobes
 * single-steps the probed instruction.
 */
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
	/* Return 0 because we don't handle the fault. */
	return 0;
}

static int __init kprobe_init(void)
{
	int ret;
	kp.pre_handler = handler_pre;
	kp.post_handler = handler_post;
	kp.fault_handler = handler_fault;

	// 注册 kprobe
	ret = register_kprobe(&kp);
	if (ret < 0) {
		pr_err("register_kprobe failed, returned %d\n", ret);
		return ret;
	}
	pr_info("Planted kprobe at %p\n", kp.addr);
	return 0;
}

static void __exit kprobe_exit(void)
{
	// 卸载
	unregister_kprobe(&kp);
	pr_info("kprobe at %p unregistered\n", kp.addr);
}

module_init(kprobe_init)
module_exit(kprobe_exit)
// 不加 GPL License 的话 insmod 无法连接 /proc/kallsyms 中要求 GPL 才能连接的符号
MODULE_LICENSE("GPL");

task_struct

Linux 下的进程控制块的类型为 struct task_struct,代码位于 include/linux/sched.h,由于代码过长,这里不放出。其中包含的数据成员有利于我们读取进程的信息。

上下文

进程上下文、中断上下文:内核可以处于进程上下文和中断上下文这两种上下文, 用户程序发起系统调用时,内核代表该用户程序运行于进程上下文;而中断处理程序运行于中断上下文。中断上下文和进程上下文不可能同时发生。

中断上下文中 current 指向被中断的进程的进程控制块。

由于中断上下文不可抢占,中断上下文不允许进入睡眠状态或主动放弃 CPU,亦不应该执行耗时任务。

内核模块开发

编译错误

错误:已禁用 SSE 却在 SSE 寄存器中返回 原因:浮点运算 我的解决方法:不用浮点数

为虚拟机编译内核模块

编译外置模块需要调整 Makefile 中 KDIR 的值,指向编译过的内核源码目录。