Linux核心中斷的處理分析--MIPS體系結構

阿新 • • 發佈：2021-07-05

Linux核心中斷處理流程分析--MIPS體系結構

作者：weirdo-xo
時間：2021-7-5
說明：本次的部落格目標子在於通過程式碼分析MIPS中具有中斷向量入口的例外，可以參考我另一篇關於MIPS中斷體系結構的文章。這裡將會講解4個常見的例外
參考：《用芯探核--基於龍芯的Linux核心探索解析》

1. 硬復位、軟復位、NMI

發生硬復位、軟復位、NMI後處理器的PC會被髮射到0xBFC00000的地址，這也是處理器啟動時預設的定址地址。這三種異常對於處理器來說都是嚴重的錯誤，是解決不了的，最終的結構基本都是導致CPU重啟或者宕機。當PC被髮射到0xBFC00000地址後，可以通過STatus的如下暫存器判斷具體的異常型別：

硬復位：BEV=1,SR=0,NMI=0
軟復位：BEV=1,SR=1,NMI=0
NMI異常：BEV=1,SR=0,NMI=1

當發生復位相關的異常時，Bootlaoder（BIOS）會重新的執行開機的流程; NMI異常發生時，會有處理器的參與，下面來看一下相關的程式碼：

/*
* NMI debug exception handler for MIPS reference boards.
* The NMI debug exception entry point is 0xbfc00000, which
* normally is in the boot PROM, so the boot PROM must do a
* unconditional jump to this vector.
*/
NESTED(except_vec_nmi, 0, sp)
  j   nmi_handler

  END(except_vec_nmi)

  __FINIT

NESTED(nmi_handler, PT_SIZE, sp)
  .cfi_signal_frame
  .set    push
  .set    noat
  /*
   * Clear ERL - restore segment mapping
   * Clear BEV - required for page fault exception handler to work
   */
  mfc0    k0, CP0_STATUS
  ori k0, k0, ST0_EXL
  li  k1, ~(ST0_BEV | ST0_ERL)
  and k0, k0, k1
  mtc0    k0, CP0_STATUS
  _ehb
  SAVE_ALL
  move    a0, sp
  jal nmi_exception_handler
  /* nmi_exception_handler never returns */
  .set    pop
  END(nmi_handler)

這裡主要對Status暫存器進行一些操作，按後跳轉到nmi_exception_handler函式進行處理。nmi_exception_handler函式的核心是呼叫通知鏈執行關機或重啟操作，下面是處理程式碼：
arch/mips/kernel/traps.c

void __noreturn nmi_exception_handler(struct pt_regs *regs)
{
  char str[100];

  nmi_enter();
  raw_notifier_call_chain(&nmi_chain, 0, regs);
  bust_spinlocks(1);
  snprintf(str, 100, "CPU%d NMI taken, CP0_EPC=%lx\n",
       smp_processor_id(), regs->cp0_epc);
  regs->cp0_epc = read_c0_errorepc();
  die(str, regs);
  nmi_exit();
}

這裡的函式標識為__noreturn可以看出函式執行後不會返回。

2. Cache錯誤異常

Cache如果真的發生了異常，軟體一般是處理不了的，最終的命運是核心報告Panic，進入宕機狀態。下面看一下程式碼中是如何處理cache錯誤異常的：

/*
* Game over.  Go to the button.  Press gently.  Swear where allowed by
* legislation.
*/ 
  LEAF(except_vec2_generic)
  .set    noreorder
  .set    noat
  .set    mips0
  /*
   * This is a very bad place to be.  Our cache error
   * detection has triggered.  If we have write-back data
   * in the cache, we may not be able to recover.  As a
   * first-order desperate measure, turn off KSEG0 cacheing.
   */
  mfc0    k0,CP0_CONFIG
  li  k1,~CONF_CM_CMASK
  and k0,k0,k1
  ori k0,k0,CONF_CM_UNCACHED
  mtc0    k0,CP0_CONFIG
  /* Give it a few cycles to sink in... */
  nop
  nop
  nop
  
  j   cache_parity_error
  nop
  END(except_vec2_generic)

從註釋我們也可以看出，如果程式碼執行到這裡的話就Game Over，這裡主要的操作是Uncache記憶體的Kseg0段。然後跳轉到cache_parity_error處理函式中，該函式會向用戶報告錯誤並進入panic()。下面看一下cache_parity_error函式的處理。

asmlinkage void cache_parity_error(void)
{   
  const int field = 2 * sizeof(unsigned long);
  unsigned int reg_val;
  
  /* For the moment, report the problem and hang. */
  printk("Cache error exception:\n");
  printk("cp0_errorepc == %0*lx\n", field, read_c0_errorepc());
  reg_val = read_c0_cacheerr();
  printk("c0_cacheerr == %08x\n", reg_val);
  
  printk("Decoded c0_cacheerr: %s cache fault in %s reference.\n",
         reg_val & (1<<30) ? "secondary" : "primary",
         reg_val & (1<<31) ? "data" : "insn");
  
  pr_err("Error bits: %s%s%s%s%s%s%s\n",
          reg_val & (1<<29) ? "ED " : "",
          reg_val & (1<<28) ? "ET " : "",
          reg_val & (1<<26) ? "EE " : "",
          reg_val & (1<<25) ? "EB " : "",
          reg_val & (1<<24) ? "EI " : "",
          reg_val & (1<<23) ? "E1 " : "",
          reg_val & (1<<22) ? "E0 " : "");
 
  printk("IDX: 0x%08x\n", reg_val & ((1<<22)-1));

  if (reg_val & (1<<22))
      printk("DErrAddr0: 0x%0*lx\n", field, read_c0_derraddr0());

  if (reg_val & (1<<23))
      printk("DErrAddr1: 0x%0*lx\n", field, read_c0_derraddr1());

  panic("Can't handle the cache error!");
}

的確，該函式主要是向用戶報告當前遇到的錯誤。

3. TLB/XTLB錯誤異常

TLB/XTLB中只有重填具有特殊的入口向量，並且為了平臺相容程式碼並沒有採用靜態程式碼的方式生成，而是採用動態程式碼--核心微彙編器--的方式產生。至於核心微彙編其的原理這裡就不再講解。下面獻給出相關的程式碼：
arch/mips/mm/tlbex.c

static void build_loongson3_tlb_refill_handler(void)
{           
    u32 *p = tlb_handler;
    struct uasm_label *l = labels;
    struct uasm_reloc *r = relocs;

    memset(labels, 0, sizeof(labels));
    memset(relocs, 0, sizeof(relocs));
    memset(tlb_handler, 0, sizeof(tlb_handler));

    if (check_for_high_segbits) {
        uasm_i_dmfc0(&p, K0, C0_BADVADDR);
        uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
        uasm_il_beqz(&p, &r, K1, label_vmalloc);
        uasm_i_nop(&p);

        uasm_il_bgez(&p, &r, K0, label_large_segbits_fault);
        uasm_i_nop(&p);
        uasm_l_vmalloc(&l, p);
    }

    uasm_i_dmfc0(&p, K1, C0_PGD);

    uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
#ifndef __PAGETABLE_PMD_FOLDED
    uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
#endif 
    uasm_i_ldpte(&p, K1, 0);      /* even */
    uasm_i_ldpte(&p, K1, 1);      /* odd */
    uasm_i_tlbwr(&p);
 
    /* restore page mask */
    if (PM_DEFAULT_MASK >> 16) {
        uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16);
        uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff);
        uasm_i_mtc0(&p, K0, C0_PAGEMASK);
    } else if (PM_DEFAULT_MASK) {
        uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK);
        uasm_i_mtc0(&p, K0, C0_PAGEMASK);
    } else {
        uasm_i_mtc0(&p, 0, C0_PAGEMASK);
    }

    uasm_i_eret(&p);

    if (check_for_high_segbits) {
        uasm_l_large_segbits_fault(&l, p);
        UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
        uasm_i_jr(&p, K1);
        uasm_i_nop(&p);
    }

    uasm_resolve_relocs(relocs, labels);
    memcpy((void *)(ebase + 0x80), tlb_handler, 0x80);
    local_flush_icache_range(ebase + 0x80, ebase + 0x100);
    dump_handler("loongson3_tlb_refill",
             (u32 *)(ebase + 0x80), (u32 *)(ebase + 0x100));
}

關於微彙編器的含義，我們可以通過asm_x_中的來進行初步的判斷，具體的含義可以檢視原始碼中的定義。

4. 其他通用異常處理

其他通用異常處理可能是離我們最近的一個具有向量地址入口的例外了，在該系列異常中包括了我們通常使用的中斷。該函式中一共會處理32個異常事件（實際使用的沒有32個，部分保留），所以處理方法是在記憶體中申請了32個地址的記憶體exception_handlers,我們把異常事件處理的函式存放在這個陣列中。當CPU執行到這裡的程式碼之後，從Cause暫存器的ExCode中取出索引值，通過索引值跳轉到指定的事件處理函式中，下面看一下程式碼的實現：

/*
 * General exception vector for all other CPUs.
 *
 * Be careful when changing this, it has to be at most 128 bytes
 * to fit into space reserved for the exception handler.
 */ 
NESTED(except_vec3_generic, 0, sp)
    .set    push
    .set    noat

    mfc0    k1, CP0_CAUSE
    andi    k1, k1, 0x7c
    dsll    k1, k1, 1
    PTR_L   k0, exception_handlers(k1)
    jr  k0
    .set    pop
    END(except_vec3_generic)

PTR_L k0, exception_handlers(k1)該條指令便是跳轉到目標地址，在目標地址中進行處理。

Linux核心中斷的處理分析--MIPS體系結構

Linux核心中斷處理流程分析--MIPS體系結構

1. 硬復位、軟復位、NMI

2. Cache錯誤異常

3. TLB/XTLB錯誤異常

4. 其他通用異常處理

Linux核心中斷的處理分析--MIPS體系結構

Linux 核心中斷體系結構（1）

深刻理解linux核心呼叫棧、棧幀結構

Linux核心中斷頂半部和底半部的理解

Akuna Capital初級量化開發人員：強大的C++程式設計背景，線性代數，數值方法，統計學，優化，訊號處理，計算機體系結構，機器學習，異構/高效能運算金融市場和交易

Linux 核心學習筆記（三）——資料結構與中斷

MIPS中斷體系結構

Java異常處理-異常體系結構

Linux核心實現透視---硬中斷

Linux核心實現透視---軟中斷&Tasklet

樸英敏：用crash工具分析Linux核心死鎖的一次實戰【轉】

Linux核心虛擬記憶體管理之匿名對映缺頁異常分析

LINUX核心命令實戰總結四——檔案過濾及內容編輯處理命令一

執行緒池原理分析(一)-執行緒池體系結構

LINUX核心命令實戰總結五——檔案過濾及內容編輯處理命令二

Linux核心原始碼分析之set_arch (一)

Linux核心原始碼分析之setup_arch (二)

linux核心休眠喚醒、流程分析之掛起：cpu（八）

異常--定義、體系結構、處理、自定義異常

Linux核心筆記--軟中斷

Linux核心中斷的處理分析--MIPS體系結構

Linux核心中斷處理流程分析--MIPS體系結構

1. 硬復位、軟復位、NMI

2. Cache錯誤異常

3. TLB/XTLB錯誤異常

4. 其他通用異常處理

相關推薦