linux内核死锁调试
使用内核的kernel hacking功能
打开以下配置
CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y
重新编译内核后,
proc目录下会有lockdep lockdep_stats lockdep_chains说明lockdep模块已经生效
测试code
#include #include #include #include static DEFINE_SPINLOCK(hack_spinA); static DEFINE_SPINLOCK(hack_spinB); #ifdef MEMORY_TEST static char* buf; static void create_slue_err(void){ buf = kmalloc(32,GFP_KERNEL); if(buf) { /*memset(buf,0x00,33);*/ /* kfree(buf); printk("%sn","free buf" );*/ /*kfree(buf);*/ memset(buf,0x00,33); } return; } #endif void hack_spinBA(void) { printk("%sn","hack_spin:B=>An" ); spin_lock(&hack_spinA); spin_lock(&hack_spinB); } void hack_spinAB(void) { printk("%sn","hack_spin:A=>Bn" ); spin_lock(&hack_spinB); } static int __init my_test_init(void) { printk("init %sn", "my_test_init 1"); #ifdef MEMORY_TEST create_slue_err(); #endif hack_spinBA(); hack_spinAB(); printk("init %sn", "my_test_init 2"); return 0; } static void __exit my_test_exit(void) { printk("%sn","my_test_exit" ); return ; } MODULE_LICENSE("GPL"); module_init(my_test_init); module_exit(my_test_exit);
测试打印:
[ 188.596504@1] init my_test_init 1
[ 188.596548@1] hack_spin:B=>A
[ 188.596548@1]
[ 188.598679@1] hack_spin:A=>B
[ 188.598679@1]
[ 189.920389@1] BUG: spinlock lockup suspected on CPU#1, insmod/6999
[ 189.920943@1] lock: hack_spinB+0x0/0xfffffffffffffef4 [slub_test], .magic: dead4ead, .owner: insmod/6999, .owner_cpu: 1
[ 189.931758@1] CPU: 1 PID: 6999 Comm: insmod Tainted: G O 3.14.29 #6
[ 189.938784@1] Call trace:
[ 189.941406@1] [] dump_backtrace+0x0/0x144
[ 189.946896@1] [] show_stack+0x1c/0x28
[ 189.952120@1] [] dump_stack+0x74/0xb8
[ 189.957247@1] [] spin_dump+0x78/0x98
[ 189.962369@1] [] do_raw_spin_lock+0x170/0x1a8
[ 189.968199@1] [] _raw_spin_lock+0x68/0x88
[ 189.973764@1] [] hack_spinAB+0x30/0x3c [slub_test]
[ 189.980015@1] [] $x+0x28/0x4c [slub_test]
[ 189.985566@1] [] do_one_initcall+0xd4/0x13c
[ 189.991257@1] [] load_module+0x16d8/0x1e08
[ 189.996834@1] [] SyS_finit_module+0x80/0x90[ 211.641019@1] INFO: rcu_sched detected stalls on CPUs/tasks: { 1} (detected by 2, t=2104 jiffies, g=18446744073709551337, c=18446744073709551336, q=4)
[ 211.648804@2] Task dump for CPU 1:
[ 211.652167@2] insmod R running task 0 6999 5821 0x0000000a
[ 211.659325@2] Call trace:
[ 211.661920@2] [] __switch_to+0x74/0x8c
很明显可以看出死锁的发生路径与调用栈
hack_spinAB在申请spin_lock(&hack_spinB)时死锁了
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持IT俱乐部。