1. 程式人生 > >記錄linux tty的一次軟鎖排查2

記錄linux tty的一次軟鎖排查2

死鎖 ice inf type pts kref int arch ioc

在復現tty的死鎖問題的時候,文洋兄使用了如下的方式:

#include   <fcntl.h>
#include   <unistd.h>
#include   <stdio.h>

#define TIOCVHANGUP 0x5437
int   main(int argc,char* argv[])
{
        int   fd;
        if(argc < 2)
        {
          printf("error,you should input tty as a parameter\r\n");
          
return 1; } fd = open(argv[1], O_WRONLY | O_NOCTTY);

if(fd<0)
{
return 1;
}

        write(fd,   "test tty\n ", 20);
        ioctl(fd, TIOCVHANGUP, 0);
        //sleep(1);
        close(fd);
        return   0;
}

編譯成gcc -g -o main.o main.c ,然後使用腳本呼叫:

#!/bin/bash
while [ 1 ]
do
  ./main.o /dev/tty4
done

之所以使用腳本而不是在c中while處理,是因為在進程exit的時候,會有些tty的處理,我們希望盡可能地覆蓋測試,所以甚至都沒有加sleep來延時。

結果復現出來下面的軟鎖故障,堆棧如下:

[517571.855382] INFO: task systemd:1 blocked for more than 120 seconds.
[517571.856127] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[
517571.856846] systemd D ffff881fffc347c0 0 1 0 0x00000000 [517571.856852] ffff881fd35c7b50 0000000000000086 ffff881fd35c7fd8 ffff881fd35c7fd8 [517571.856859] ffff881fd35c7fd8 00000000000147c0 ffff881fd313c500 ffff883f5ee2ac80 [517571.856863] ffff883f5ee2ac84 ffff883fd1630000 00000000ffffffff ffff883f5ee2ac88 [517571.856867] Call Trace: [517571.856880] [<ffffffff8163f959>] schedule_preempt_disabled+0x29/0x70 [517571.856883] [<ffffffff8163d415>] __mutex_lock_slowpath+0xc5/0x1c0 [517571.856888] [<ffffffff8163c87f>] mutex_lock+0x1f/0x2f [517571.856890] [<ffffffff81640df8>] tty_lock_nested.isra.0+0x38/0x90 [517571.856892] [<ffffffff81640e5e>] tty_lock+0xe/0x10 [517571.856899] [<ffffffff813b204c>] tty_open+0xcc/0x620 [517571.856906] [<ffffffff811e5721>] chrdev_open+0xa1/0x1e0 [517571.856912] [<ffffffff811de657>] do_dentry_open+0x1a7/0x2e0 [517571.856916] [<ffffffff811e5680>] ? cdev_put+0x30/0x30 [517571.856918] [<ffffffff811de889>] vfs_open+0x39/0x70 [517571.856922] [<ffffffff811ede7d>] do_last+0x1ed/0x1270 [517571.856925] [<ffffffff811f0be2>] path_openat+0xc2/0x490 [517571.856930] [<ffffffff810afb68>] ? __wake_up_common+0x58/0x90 [517571.856935] [<ffffffff811f23ab>] do_filp_open+0x4b/0xb0 [517571.856941] [<ffffffff811fef47>] ? __alloc_fd+0xa7/0x130 [517571.856945] [<ffffffff811dfd53>] do_sys_open+0xf3/0x1f0 [517571.856949] [<ffffffff811dfe6e>] SyS_open+0x1e/0x20 [517571.856955] [<ffffffff81649909>] system_call_fastpath+0x16/0x1b

從堆棧看,顯然又是在等鎖超時了。反匯編找到這把鎖是關鍵。

void __lockfunc tty_lock(struct tty_struct *tty)
{
    return tty_lock_nested(tty, TTY_MUTEX_NORMAL);
}
static void __lockfunc tty_lock_nested(struct tty_struct *tty,
                       unsigned int subclass)
{
    if (tty->magic != TTY_MAGIC) {
        pr_err("L Bad %p\n", tty);
        WARN_ON(1);
        return;
    }
    tty_kref_get(tty);
    mutex_lock_nested(&tty->legacy_mutex, subclass);--------------傳入鎖的指針
}

由於CONFIG_DEBUG_LOCK_ALLOC並沒有配置,所以mutex_lock_nested就是mutex_lock。和堆棧是匹配的。

# define mutex_lock_nested(lock, subclass) mutex_lock(lock)

crash> dis -l tty_lock_nested
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 18
0xffffffff81640dc0 <tty_lock_nested>:   nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffff81640dc5 <tty_lock_nested+5>: push   %rbp
0xffffffff81640dc6 <tty_lock_nested+6>: mov    %rsp,%rbp
0xffffffff81640dc9 <tty_lock_nested+9>: push   %rbx
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 21
0xffffffff81640dca <tty_lock_nested+10>:        cmpl   $0x5401,(%rdi)
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 18
0xffffffff81640dd0 <tty_lock_nested+16>:        mov    %rdi,%rbx
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 21
0xffffffff81640dd3 <tty_lock_nested+19>:        jne    0xffffffff81640dfb <tty_lock_nested+59>
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/include/linux/tty.h: 388
0xffffffff81640dd5 <tty_lock_nested+21>:        test   %rdi,%rdi
0xffffffff81640dd8 <tty_lock_nested+24>:        je     0xffffffff81640dec <tty_lock_nested+44>
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/arch/x86/include/asm/atomic.h: 176
0xffffffff81640dda <tty_lock_nested+26>:        mov    $0x1,%eax
0xffffffff81640ddf <tty_lock_nested+31>:        lock xadd %eax,0x4(%rdi)
0xffffffff81640de4 <tty_lock_nested+36>:        add    $0x1,%eax
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/include/linux/kref.h: 47
0xffffffff81640de7 <tty_lock_nested+39>:        cmp    $0x1,%eax
0xffffffff81640dea <tty_lock_nested+42>:        jle    0xffffffff81640e1f <tty_lock_nested+95>
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/drivers/tty/tty_mutex.c: 27
0xffffffff81640dec <tty_lock_nested+44>:        lea    0x80(%rbx),%rdi------------------傳入的參數是一把鎖的地址,即&tty->legacy_mutex,rbx就是tty的指針了。
0xffffffff81640df3 <tty_lock_nested+51>: callq 0xffffffff8163c860 <mutex_lock>--------------------調用mutex_lock

crash> dis -l mutex_lock
/usr/src/debug/kernel-3.10.0-327.22.2.el7/linux-3.10.0-327.22.2.el7.x86_64/kernel/mutex.c: 103
0xffffffff8163c860 <mutex_lock>:        nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffff8163c865 <mutex_lock+5>:      push   %rbp
0xffffffff8163c866 <mutex_lock+6>:      mov    %rsp,%rbp
0xffffffff8163c869 <mutex_lock+9>:      push   %rbx--------------------------------------------------rbx壓棧,所以rbp後面就是rbx的值

所以我們能夠通過堆棧分析出tty的指針來,rbx的壓棧的位置是在rbp之後。

    ffff881fd35c7bc0: ffff881fd35c7bd8 ffffffff8163c87f 
 #3 [ffff881fd35c7bc8] mutex_lock at ffffffff8163c87f
    ffff881fd35c7bd0: ffff883f5ee2ac00 ffff881fd35c7bf0 -----------------------ffff883f5ee2ac00就是rbx的值,也就是tty指針
    ffff881fd35c7be0: ffffffff81640df8 
 #4 [ffff881fd35c7be0] tty_lock_nested at ffffffff81640df8
    ffff881fd35c7be8: ffff88211f6a3200 ffff881fd35c7c00 
    ffff881fd35c7bf8: ffffffff81640e5e 

現在,需要找到持有這把鎖的owner是誰。

crash> struct tty_struct.legacy_mutex ffff883f5ee2ac00
  legacy_mutex = {
    count = {
      counter = -1
    }, 
    wait_lock = {
      {
        rlock = {
          raw_lock = {
            {
              head_tail = 524296, 
              tickets = {
                head = 8, 
                tail = 8
              }
            }
          }
        }
      }
    }, 
    wait_list = {
      next = 0xffff881fd35c7b70, 
      prev = 0xffff881fd35c7b70
    }, 
    owner = 0xffff880190f5c500, -----------------持有鎖

查看對應的task:

crash> task 0xffff880190f5c500
PID: 5628   TASK: ffff880190f5c500  CPU: 47  COMMAND: "main.o"------------就是我們編譯的測試命令

確認下是不是我們的tty4.

crash> struct tty_strt.name ffff883f5ee2ac00
  name = "tty4\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"

確定無誤後,看看進程打開的文件列表:

crash> files 5628
PID: 5628   TASK: ffff880190f5c500  CPU: 47  COMMAND: "main.o"
ROOT: /    CWD: /home/caq
 FD       FILE            DENTRY           INODE       TYPE PATH
  0 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR  /dev/pts/45
  1 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR  /dev/pts/45
  2 ffff881f0e31a600 ffff880dd37f8000 ffff8801713fcea0 CHR  /dev/pts/45
  3 ffff881a00324400 ffff883fd1010fc0 ffff883fd0b73820 CHR  /dev/tty4

查看對應的tty的屬性:

crash> struct  file.private_data  ffff881a00324400
  private_data = 0xffff883f6101e840
crash> struct tty_file_private.tty 0xffff883f6101e840
  tty = 0xffff883f5ee2ac00
crash> struct tty_struct.disc_data 0xffff883f5ee2ac00----------------這個 0xffff883f5ee2ac00 也就是在前面反匯編找到的tty指針
 disc_data = 0xffff883f9a1d8c00 
crash
> struct n_tty_data.icanon 0xffff883f9a1d8c00 icanon = 1 \001

當然也可以使用tty來直接查看。

最後殊途同歸,還是同一個問題,屬性導致的。

static void __tty_hangup(struct tty_struct *tty, int exit_session)
{
    struct file *cons_filp = NULL;
    struct file *filp, *f = NULL;
    struct tty_file_private *priv;
    int    closecount = 0, n;
    int refs;

    if (!tty)
        return;


    spin_lock(&redirect_lock);
    if (redirect && file_tty(redirect) == tty) {
        f = redirect;
        redirect = NULL;
    }
    spin_unlock(&redirect_lock);

    tty_lock(tty);-----------------------------------------加鎖

    /* some functions below drop BTM, so we need this bit */
    set_bit(TTY_HUPPING, &tty->flags);

    /* inuse_filps is protected by the single tty lock,
       this really needs to change if we want to flush the
       workqueue with the lock held */
    check_tty_count(tty, "tty_hangup");

    spin_lock(&tty_files_lock);
    /* This breaks for file handles being sent over AF_UNIX sockets ? */
    list_for_each_entry(priv, &tty->tty_files, list) {
        filp = priv->file;
        if (filp->f_op->write == redirected_tty_write)
            cons_filp = filp;
        if (filp->f_op->write != tty_write)
            continue;
        closecount++;
        __tty_fasync(-1, filp, 0);    /* can‘t block */
        filp->f_op = &hung_up_tty_fops;
    }
    spin_unlock(&tty_files_lock);

    refs = tty_signal_session_leader(tty, exit_session);
    /* Account for the p->signal references we killed */
    while (refs--)
        tty_kref_put(tty);

    /*
     * it drops BTM and thus races with reopen
     * we protect the race by TTY_HUPPING
     */
    tty_ldisc_hangup(tty);-----------------------阻塞,阻塞的原因上面已經描述了。

    spin_lock_irq(&tty->ctrl_lock);
    clear_bit(TTY_THROTTLED, &tty->flags);
    clear_bit(TTY_PUSH, &tty->flags);
    clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
    put_pid(tty->session);
    put_pid(tty->pgrp);
    tty->session = NULL;
    tty->pgrp = NULL;
    tty->ctrl_status = 0;
    spin_unlock_irq(&tty->ctrl_lock);

    /*
     * If one of the devices matches a console pointer, we
     * cannot just call hangup() because that will cause
     * tty->count and state->count to go out of sync.
     * So we just call close() the right number of times.
     */
    if (cons_filp) {
        if (tty->ops->close)
            for (n = 0; n < closecount; n++)
                tty->ops->close(tty, cons_filp);
    } else if (tty->ops->hangup)
        (tty->ops->hangup)(tty);
    /*
     * We don‘t want to have driver/ldisc interactions beyond
     * the ones we did here. The driver layer expects no
     * calls after ->hangup() from the ldisc side. However we
     * can‘t yet guarantee all that.
     */
    set_bit(TTY_HUPPED, &tty->flags);
    clear_bit(TTY_HUPPING, &tty->flags);

    tty_unlock(tty);-------------------------------導致沒有走到這放鎖。

    if (f)
        fput(f);
}

那麽,很顯然,這個測試腳本,可以測試《記錄linux tty的一次軟鎖排查》中的修改是否已經ok。

修改腳本如下:

#!/bin/bash
while [ 1 ]
do
  for i in {1..64}
  do
    ./main.o /dev/tty$i
  done
done

記錄linux tty的一次軟鎖排查2