task_struct 結構如何檢視及分析
cd /
find -name sched.h
vim usr/src/kernels/3.10.0862.6.3.el7.x86_64/include/linux/sched.h
https://www.cnblogs.com/zxc2man/p/6649771.html
程序是處於執行期的程式以及它所管理的資源(如開啟的檔案、掛起的訊號、程序狀態、地址空間等等)的總稱。注意,程式並不是程序,實際上兩個或多個程序不僅有可能執行同一程式,而且還有可能共享地址空間等資源。
Linux核心通過一個被稱為程序描述符的task_struct結構體來管理程序,這個結構體包含了一個程序所需的所有資訊。它定義在linux-2.6.38.8/include/linux/sched.h檔案中。 本文將盡力就task_struct結構體所有成員的用法進行簡要說明。 1、程序狀態
[cpp] view plain copy print?
volatile long state;
int exit_state;
volatile long state;
int exit_state;
state成員的可能取值如下:
[cpp] view plain copy print?
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/* in tsk->exit_state /
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/ in tsk->exit_state /
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/ in tsk->state again */
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
系統中的每個程序都必然處於以上所列程序狀態中的一種。
TASK_RUNNING表示程序要麼正在執行,要麼正要準備執行。
TASK_INTERRUPTIBLE表示程序被阻塞(睡眠),直到某個條件變為真。條件一旦達成,程序的狀態就被設定為TASK_RUNNING。
TASK_UNINTERRUPTIBLE的意義與TASK_INTERRUPTIBLE類似,除了不能通過接受一個訊號來喚醒以外。
__TASK_STOPPED表示程序被停止執行。
__TASK_TRACED表示程序被debugger等程序監視。
EXIT_ZOMBIE表示程序的執行被終止,但是其父程序還沒有使用wait()等系統呼叫來獲知它的終止資訊。
EXIT_DEAD表示程序的最終狀態。
EXIT_ZOMBIE和EXIT_DEAD也可以存放在exit_state成員中。程序狀態的切換過程和原因大致如下圖(圖片來自《Linux Kernel Development》):
2、程序識別符號(PID)
[cpp] view plain copy print?
pid_t pid;
pid_t tgid;
pid_t pid;
pid_t tgid;
在CONFIG_BASE_SMALL配置為0的情況下,PID的取值範圍是0到32767,即系統中的程序數最大為32768個。
[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/threads.h */
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
/* linux-2.6.38.8/include/linux/threads.h */
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
在Linux系統中,一個執行緒組中的所有執行緒使用和該執行緒組的領頭執行緒(該組中的第一個輕量級程序)相同的PID,並被存放在tgid成員中。只有執行緒組的領頭執行緒的pid成員才會被設定為與tgid相同的值。注意,getpid()系統呼叫返回的是當前程序的tgid值而不是pid值。
3、程序核心棧
[cpp] view plain copy print?
void *stack;
void *stack;
程序通過alloc_thread_info函式分配它的核心棧,通過free_thread_info函式釋放所分配的核心棧。
[cpp] view plain copy print?
/* linux-2.6.38.8/kernel/fork.c */
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
{
#ifdef CONFIG_DEBUG_STACK_USAGE
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
#else
gfp_t mask = GFP_KERNEL;
#endif
return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
}
static inline void free_thread_info(struct thread_info *ti)
{
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
/* linux-2.6.38.8/kernel/fork.c */
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
{
#ifdef CONFIG_DEBUG_STACK_USAGE
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
#else
gfp_t mask = GFP_KERNEL;
#endif
return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
}
static inline void free_thread_info(struct thread_info *ti)
{
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
其中,THREAD_SIZE_ORDER巨集在linux-2.6.38.8/arch/arm/include/asm/thread_info.h檔案中被定義為1,也就是說alloc_thread_info函式通過呼叫__get_free_pages函式分配2個頁的記憶體(它的首地址是8192位元組對齊的)。
Linux核心通過thread_union聯合體來表示程序的核心棧,其中THREAD_SIZE巨集的大小為8192。
[cpp] view plain copy print?
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
當程序從使用者態切換到核心態時,程序的核心棧總是空的,所以ARM的sp暫存器指向這個棧的頂端。因此,核心能夠輕易地通過sp暫存器獲得當前正在CPU上執行的程序。
[cpp] view plain copy print?
/* linux-2.6.38.8/arch/arm/include/asm/current.h */
static inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
#define current (get_current())
/* linux-2.6.38.8/arch/arm/include/asm/thread_info.h */
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm (“sp”);
return (struct thread_info )(sp & ~(THREAD_SIZE - 1));
}
/ linux-2.6.38.8/arch/arm/include/asm/current.h */
static inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
#define current (get_current())
/* linux-2.6.38.8/arch/arm/include/asm/thread_info.h */
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm (“sp”);
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
程序核心棧與程序描述符的關係如下圖:
4、標記
[cpp] view plain copy print?
unsigned int flags; /* per process flags, defined below */
unsigned int flags; /* per process flags, defined below */
flags成員的可能取值如下:
[cpp] view plain copy print?
#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd /
#define PF_STARTING 0x00000002 / being created /
#define PF_EXITING 0x00000004 / getting shut down /
#define PF_EXITPIDONE 0x00000008 / pi exit done on shut down /
#define PF_VCPU 0x00000010 / I’m a virtual CPU /
#define PF_WQ_WORKER 0x00000020 / I’m a workqueue worker /
#define PF_FORKNOEXEC 0x00000040 / forked but didn’t exec /
#define PF_MCE_PROCESS 0x00000080 / process policy on mce errors /
#define PF_SUPERPRIV 0x00000100 / used super-user privileges /
#define PF_DUMPCORE 0x00000200 / dumped core /
#define PF_SIGNALED 0x00000400 / killed by a signal /
#define PF_MEMALLOC 0x00000800 / Allocating memory /
#define PF_USED_MATH 0x00002000 / if unset the fpu must be initialized before use /
#define PF_FREEZING 0x00004000 / freeze in progress. do not account to load /
#define PF_NOFREEZE 0x00008000 / this thread should not be frozen /
#define PF_FROZEN 0x00010000 / frozen for system suspend /
#define PF_FSTRANS 0x00020000 / inside a filesystem transaction /
#define PF_KSWAPD 0x00040000 / I am kswapd /
#define PF_OOM_ORIGIN 0x00080000 / Allocating much memory to others /
#define PF_LESS_THROTTLE 0x00100000 / Throttle me less: I clean memory /
#define PF_KTHREAD 0x00200000 / I am a kernel thread /
#define PF_RANDOMIZE 0x00400000 / randomize virtual address space /
#define PF_SWAPWRITE 0x00800000 / Allowed to write to swap /
#define PF_SPREAD_PAGE 0x01000000 / Spread page cache over cpuset /
#define PF_SPREAD_SLAB 0x02000000 / Spread some slab caches over cpuset /
#define PF_THREAD_BOUND 0x04000000 / Thread bound to specific cpu /
#define PF_MCE_EARLY 0x08000000 / Early kill for mce process policy /
#define PF_MEMPOLICY 0x10000000 / Non-default NUMA mempolicy /
#define PF_MUTEX_TESTER 0x20000000 / Thread belongs to the rt mutex tester /
#define PF_FREEZER_SKIP 0x40000000 / Freezer should not count it as freezable /
#define PF_FREEZER_NOSIG 0x80000000 / Freezer won’t send signals to it /
#define PF_KSOFTIRQD 0x00000001 / I am ksoftirqd /
#define PF_STARTING 0x00000002 / being created /
#define PF_EXITING 0x00000004 / getting shut down /
#define PF_EXITPIDONE 0x00000008 / pi exit done on shut down /
#define PF_VCPU 0x00000010 / I’m a virtual CPU /
#define PF_WQ_WORKER 0x00000020 / I’m a workqueue worker /
#define PF_FORKNOEXEC 0x00000040 / forked but didn’t exec /
#define PF_MCE_PROCESS 0x00000080 / process policy on mce errors /
#define PF_SUPERPRIV 0x00000100 / used super-user privileges /
#define PF_DUMPCORE 0x00000200 / dumped core /
#define PF_SIGNALED 0x00000400 / killed by a signal /
#define PF_MEMALLOC 0x00000800 / Allocating memory /
#define PF_USED_MATH 0x00002000 / if unset the fpu must be initialized before use /
#define PF_FREEZING 0x00004000 / freeze in progress. do not account to load /
#define PF_NOFREEZE 0x00008000 / this thread should not be frozen /
#define PF_FROZEN 0x00010000 / frozen for system suspend /
#define PF_FSTRANS 0x00020000 / inside a filesystem transaction /
#define PF_KSWAPD 0x00040000 / I am kswapd /
#define PF_OOM_ORIGIN 0x00080000 / Allocating much memory to others /
#define PF_LESS_THROTTLE 0x00100000 / Throttle me less: I clean memory /
#define PF_KTHREAD 0x00200000 / I am a kernel thread /
#define PF_RANDOMIZE 0x00400000 / randomize virtual address space /
#define PF_SWAPWRITE 0x00800000 / Allowed to write to swap /
#define PF_SPREAD_PAGE 0x01000000 / Spread page cache over cpuset /
#define PF_SPREAD_SLAB 0x02000000 / Spread some slab caches over cpuset /
#define PF_THREAD_BOUND 0x04000000 / Thread bound to specific cpu /
#define PF_MCE_EARLY 0x08000000 / Early kill for mce process policy /
#define PF_MEMPOLICY 0x10000000 / Non-default NUMA mempolicy /
#define PF_MUTEX_TESTER 0x20000000 / Thread belongs to the rt mutex tester /
#define PF_FREEZER_SKIP 0x40000000 / Freezer should not count it as freezable /
#define PF_FREEZER_NOSIG 0x80000000 / Freezer won’t send signals to it */
5、表示程序親屬關係的成員
[cpp] view plain copy print?
struct task_struct real_parent; / real parent process */
struct task_struct parent; / recipient of SIGCHLD, wait4() reports /
struct list_head children; / list of my children /
struct list_head sibling; / linkage in my parent’s children list */
struct task_struct group_leader; / threadgroup leader */
struct task_struct real_parent; / real parent process */
struct task_struct parent; / recipient of SIGCHLD, wait4() reports /
struct list_head children; / list of my children /
struct list_head sibling; / linkage in my parent’s children list */
struct task_struct group_leader; / threadgroup leader */
在Linux系統中,所有程序之間都有著直接或間接地聯絡,每個程序都有其父程序,也可能有零個或多個子程序。擁有同一父程序的所有程序具有兄弟關係。
real_parent指向其父程序,如果建立它的父程序不再存在,則指向PID為1的init程序。
parent指向其父程序,當它終止時,必須向它的父程序傳送訊號。它的值通常與real_parent相同。
children表示連結串列的頭部,連結串列中的所有元素都是它的子程序。
sibling用於把當前程序插入到兄弟連結串列中。
group_leader指向其所在程序組的領頭程序。
6、ptrace系統呼叫
[cpp] view plain copy print?
unsigned int ptrace;
struct list_head ptraced;
struct list_head ptrace_entry;
unsigned long ptrace_message;
siginfo_t last_siginfo; / For ptrace use. */
ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
endif
unsigned int ptrace;
struct list_head ptraced;
struct list_head ptrace_entry;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
#endif
成員ptrace被設定為0時表示不需要被跟蹤,它的可能取值如下:
[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/ptrace.h /
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 / delayed trace (used on m68k, i386) /
#define PT_TRACESYSGOOD 0x00000004
#define PT_PTRACE_CAP 0x00000008 / ptracer can follow suid-exec */
#define PT_TRACE_FORK 0x00000010
#define PT_TRACE_VFORK 0x00000020
#define PT_TRACE_CLONE 0x00000040
#define PT_TRACE_EXEC 0x00000080
#define PT_TRACE_VFORK_DONE 0x00000100
#define PT_TRACE_EXIT 0x00000200
/* linux-2.6.38.8/include/linux/ptrace.h /
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 / delayed trace (used on m68k, i386) /
#define PT_TRACESYSGOOD 0x00000004
#define PT_PTRACE_CAP 0x00000008 / ptracer can follow suid-exec */
#define PT_TRACE_FORK 0x00000010
#define PT_TRACE_VFORK 0x00000020
#define PT_TRACE_CLONE 0x00000040
#define PT_TRACE_EXEC 0x00000080
#define PT_TRACE_VFORK_DONE 0x00000100
#define PT_TRACE_EXIT 0x00000200
7、Performance Event
[cpp] view plain copy print?
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
Performance Event是一款隨 Linux 核心程式碼一同釋出和維護的效能診斷工具。這些成員用於幫助PerformanceEvent分析程序的效能問題。
關於Performance Event工具的介紹可參考文章http://www.ibm.com/developerworks/cn/linux/l-cn-perf1/index.html?ca=drs-#major1和http://www.ibm.com/developerworks/cn/linux/l-cn-perf2/index.html?ca=drs-#major1。
8、程序排程
[cpp] view plain copy print?
int prio, static_prio, normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
unsigned int policy;
cpumask_t cpus_allowed;
int prio, static_prio, normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
unsigned int policy;
cpumask_t cpus_allowed;
實時優先順序範圍是0到MAX_RT_PRIO-1(即99),而普通程序的靜態優先順序範圍是從MAX_RT_PRIO到MAX_PRIO-1(即100到139)。值越大靜態優先順序越低。
[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/sched.h */
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
/* linux-2.6.38.8/include/linux/sched.h */
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static_prio用於儲存靜態優先順序,可以通過nice系統呼叫來進行修改。
rt_priority用於儲存實時優先順序。
normal_prio的值取決於靜態優先順序和排程策略。
prio用於儲存動態優先順序。
policy表示程序的排程策略,目前主要有以下五種:
[cpp] view plain copy print?
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
SCHED_NORMAL用於普通程序,通過CFS排程器實現。SCHED_BATCH用於非互動的處理器消耗型程序。SCHED_IDLE是在系統負載很低時使用。
SCHED_FIFO(先入先出排程演算法)和SCHED_RR(輪流排程演算法)都是實時排程策略。
sched_class結構體表示排程類,目前核心中有實現以下四種:
[cpp] view plain copy print?
/* linux-2.6.38.8/kernel/sched_fair.c /
static const struct sched_class fair_sched_class;
/ linux-2.6.38.8/kernel/sched_rt.c /
static const struct sched_class rt_sched_class;
/ linux-2.6.38.8/kernel/sched_idletask.c /
static const struct sched_class idle_sched_class;
/ linux-2.6.38.8/kernel/sched_stoptask.c */
static const struct sched_class stop_sched_class;
/* linux-2.6.38.8/kernel/sched_fair.c /
static const struct sched_class fair_sched_class;
/ linux-2.6.38.8/kernel/sched_rt.c /
static const struct sched_class rt_sched_class;
/ linux-2.6.38.8/kernel/sched_idletask.c /
static const struct sched_class idle_sched_class;
/ linux-2.6.38.8/kernel/sched_stoptask.c */
static const struct sched_class stop_sched_class;
se和rt都是呼叫實體,一個用於普通程序,一個用於實時程序,每個程序都有其中之一的實體。
cpus_allowed用於控制程序可以在哪裡處理器上執行。
9、程序地址空間
[cpp] view plain copy print?
struct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif
struct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif
mm指向程序所擁有的記憶體描述符,而active_mm指向程序執行時所使用的記憶體描述符。對於普通程序而言,這兩個指標變數的值相同。但是,核心執行緒不擁有任何記憶體描述符,所以它們的mm成員總是為NULL。當核心執行緒得以執行時,它的active_mm成員被初始化為前一個執行程序的active_mm值。
brk_randomized的用法在http://lkml.indiana.edu/hypermail/Linux/kernel/1104.1/00196.html上有介紹,用來確定對隨機堆記憶體的探測。
rss_stat用來記錄緩衝資訊。
10、判斷標誌
[cpp] view plain copy print?
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies /
/ ??? /
unsigned int personality;
unsigned did_exec:1;
unsigned in_execve:1; / Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;
/* Revert to default priority/policy when forking /
unsigned sched_reset_on_fork:1;
int exit_code, exit_signal;
int pdeath_signal; / The signal sent when the parent dies /
/ ??? /
unsigned int personality;
unsigned did_exec:1;
unsigned in_execve:1; / Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;
/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1;
exit_code用於設定程序的終止代號,這個值要麼是_exit()或exit_group()系統呼叫引數(正常終止),要麼是由核心提供的一個錯誤代號(異常終止)。
exit_signal被置為-1時表示是某個執行緒組中的一員。只有當執行緒組的最後一個成員終止時,才會產生一個訊號,以通知執行緒組的領頭程序的父程序。
pdeath_signal用於判斷父程序終止時傳送訊號。
personality用於處理不同的ABI,它的可能取值如下:
[cpp] view plain copy print?
enum {
PER_LINUX = 0x0000,
PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS |
WHOLE_SECONDS | SHORT_INODE,
PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
PER_BSD = 0x0006,
PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
PER_LINUX32 = 0x0008,
PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit /
PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/ IRIX6 new 32-bit /
PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/ IRIX6 64-bit /
PER_RISCOS = 0x000c,
PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_OSF4 = 0x000f, / OSF/1 v4 /
PER_HPUX = 0x0010,
PER_MASK = 0x00ff,
};
enum {
PER_LINUX = 0x0000,
PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS |
WHOLE_SECONDS | SHORT_INODE,
PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
PER_BSD = 0x0006,
PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
PER_LINUX32 = 0x0008,
PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/ IRIX5 32-bit /
PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/ IRIX6 new 32-bit /
PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/ IRIX6 64-bit /
PER_RISCOS = 0x000c,
PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_OSF4 = 0x000f, / OSF/1 v4 */
PER_HPUX = 0x0010,
PER_MASK = 0x00ff,
};
did_exec用於記錄程序程式碼是否被execve()函式所執行。
in_execve用於通知LSM是否被do_execve()函式所呼叫。詳見補丁說明:http://lkml.indiana.edu/hypermail/linux/kernel/0901.1/00014.html。
in_iowait用於判斷是否進行iowait計數。
sched_reset_on_fork用於判斷是否恢復預設的優先順序或排程策略。
11、時間
[cpp] view plain copy print?
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts /
struct timespec start_time; / monotonic time /
struct timespec real_start_time; / boot based time /
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
#ifdef CONFIG_DETECT_HUNG_TASK
/ hung task detection /
unsigned long last_switch_count;
#endif
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; / context switch counts /
struct timespec start_time; / monotonic time /
struct timespec real_start_time; / boot based time /
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
#ifdef CONFIG_DETECT_HUNG_TASK
/ hung task detection */
unsigned long last_switch_count;
#endif
utime/stime用於記錄程序在使用者態/核心態下所經過的節拍數(定時器)。prev_utime/prev_stime是先前的執行時間,請參考補丁說明http://lkml.indiana.edu/hypermail/linux/kernel/1003.3/02431.html。
utimescaled/stimescaled也是用於記錄程序在使用者態/核心態的執行時間,但它們以處理器的頻率為刻度。
gtime是以節拍計數的虛擬機器執行時間(guest time)。
nvcsw/nivcsw是自願(voluntary)/非自願(involuntary)上下文切換計數。last_switch_count是nvcsw和nivcsw的總和。
start_time和real_start_time都是程序建立時間,real_start_time還包含了程序睡眠時間,常用於/proc/pid/stat,補丁說明請參考http://lkml.indiana.edu/hypermail/linux/kernel/0705.0/2094.html。
cputime_expires用來統計程序或程序組被跟蹤的處理器時間,其中的三個成員對應著cpu_timers[3]的三個連結串列。
12、訊號處理
[cpp] view plain copy print?
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
sigset_t blocked, real_blocked;
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
sigset_t blocked, real_blocked;
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
signal指向程序的訊號描述符。
sighand指向程序的訊號處理程式描述符。
blocked表示被阻塞訊號的掩碼,real_blocked表示臨時掩碼。
pending存放私有掛起訊號的資料結構。
sas_ss_sp是訊號處理程式備用堆疊的地址,sas_ss_size表示堆疊的大小。
裝置驅動程式常用notifier指向的函式來阻塞程序的某些訊號(notifier_mask是這些訊號的位掩碼),notifier_data指的是notifier所指向的函式可能使用的資料。
13、其他
(1)、用於保護資源分配或釋放的自旋鎖
[cpp] view plain copy print?
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
- mempolicy */
spinlock_t alloc_lock;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
- mempolicy */
spinlock_t alloc_lock;
(2)、程序描述符使用計數,被置為2時,表示程序描述符正在被使用而且其相應的程序處於活動狀態。
[cpp] view plain copy print?
atomic_t usage;
atomic_t usage;
(3)、用於表示獲取大核心鎖的次數,如果程序未獲得過鎖,則置為-1。
[cpp] view plain copy print?
int lock_depth; /* BKL lock depth */
int lock_depth; /* BKL lock depth */
(4)、在SMP上幫助實現無加鎖的程序切換(unlocked context switches)
[cpp] view plain copy print?
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
#endif
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
#endif
(5)、preempt_notifier結構體連結串列
[cpp] view plain copy print?
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
(6)、FPU使用計數
[cpp] view plain copy print?
unsigned char fpu_counter;
unsigned char fpu_counter;
(7)、blktrace是一個針對Linux核心中塊裝置I/O層的跟蹤工具。
[cpp] view plain copy print?
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
(8)、RCU同步原語
[cpp] view plain copy print?
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node rcu_blocked_node;
#endif / #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
struct rt_mutex rcu_boost_mutex;
#endif / #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node rcu_blocked_node;
#endif / #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
struct rt_mutex rcu_boost_mutex;
#endif / #ifdef CONFIG_RCU_BOOST */
(9)、用於排程器統計程序的執行資訊
[cpp] view plain copy print?
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
(10)、用於構建程序連結串列
[cpp] view plain copy print?
struct list_head tasks;
struct list_head tasks;
(11)、to limit pushing to one attempt
[cpp] view plain copy print?
#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
#endif
#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
#endif
補丁說明請參考:http://lkml.indiana.edu/hypermail/linux/kernel/0808.3/0503.html
(12)、防止核心堆疊溢位
[cpp] view plain copy print?
#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature /
unsigned long stack_canary;
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
/ Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
#endif
在GCC編譯核心時,需要加上-fstack-protector選項。
(13)、PID散列表和連結串列
[cpp] view plain copy print?
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group; //執行緒組中所有程序的連結串列
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group; //執行緒組中所有程序的連結串列
(14)、do_fork函式
[cpp] view plain copy print?
struct completion vfork_done; / for vfork() */
int __user set_child_tid; / CLONE_CHILD_SETTID */
int __user clear_child_tid; / CLONE_CHILD_CLEARTID */
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
在執行do_fork()時,如果給定特別標誌,則vfork_done會指向一個特殊地址。
如果copy_process函式的clone_flags引數的值被置為CLONE_CHILD_SETTID或CLONE_CHILD_CLEARTID,則會把child_tidptr引數的值分別複製到set_child_tid和clear_child_tid成員。這些標誌說明必須改變子程序使用者態地址空間的child_tidptr所指向的變數的值。
(15)、缺頁統計
[cpp] view plain copy print?
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
(16)、程序權能
[cpp] view plain copy print?
const struct cred __rcu real_cred; / objective and real subjective task
* credentials (COW) */
const struct cred __rcu cred; / effective (overridable) subjective task
* credentials (COW) */
struct cred replacement_session_keyring; / for KEYCTL_SESSION_TO_PARENT */
const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task
* credentials (COW) */
struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
(17)、相應的程式名
[cpp] view plain copy print?
char comm[TASK_COMM_LEN];
char comm[TASK_COMM_LEN];
(18)、檔案
[cpp] view plain copy print?
/* file system info /
int link_count, total_link_count;
/ filesystem information */
struct fs_struct fs;
/ open file information */
struct files_struct *files;
/* file system info /
int link_count, total_link_count;
/ filesystem information */
struct fs_struct fs;
/ open file information */
struct files_struct *files;
fs用來表示程序與檔案系統的聯絡,包括當前目錄和根目錄。
files表示程序當前開啟的檔案。
(19)、程序通訊(SYSVIPC)
[cpp] view plain copy print?
#ifdef CONFIG_SYSVIPC
/* ipc stuff /
struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_SYSVIPC
/ ipc stuff */
struct sysv_sem sysvsem;
#endif
(20)、處理器特有資料
[cpp] view plain copy print?
/* CPU-specific state of this task */
struct thread_struct thread;
/* CPU-specific state of this task */
struct thread_struct thread;
(21)、名稱空間
[cpp] view plain copy print?
/* namespaces */
struct nsproxy *nsproxy;
/* namespaces */
struct nsproxy *nsproxy;
(22)、程序審計
[cpp] view plain copy print?
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif
(23)、secure computing
[cpp] view plain copy print?
seccomp_t seccomp;
seccomp_t seccomp;
(24)、用於copy_process函式使用CLONE_PARENT 標記時
[cpp] view plain copy print?
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
(25)、中斷
[cpp] view plain copy print?
#ifdef CONFIG_GENERIC_HARDIRQS
/* IRQ handler threads */
struct irqaction irqaction;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
#endif
#ifdef CONFIG_GENERIC_HARDIRQS
/ IRQ handler threads */
struct irqaction *irqaction;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
#endif
(26)、task_rq_lock函式所使用的鎖
[cpp] view plain copy print?
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
(27)、基於PI協議的等待互斥鎖,其中PI指的是priority inheritance(優先順序繼承)
[cpp] view plain copy print?
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task /
struct plist_head pi_waiters;
/ Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task /
struct plist_head pi_waiters;
/ Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
(28)、死鎖檢測
[cpp] view plain copy print?
#ifdef CONFIG_DEBUG_MUTEXES
/* mutex deadlock detection */
struct mutex_waiter blocked_on;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
/ mutex deadlock detection */
struct mutex_waiter *blocked_on;
#endif
(29)、lockdep,參見核心說明文件linux-2.6.38.8/Documentation/lockdep-design.txt
[cpp] view plain copy print?
#ifdef CONFIG_LOCKDEP
define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif
#ifdef CONFIG_LOCKDEP
define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif
(30)、JFS檔案系統
[cpp] view plain copy print?
/* journalling filesystem info */
void journal_info;
/ journalling filesystem info */
void *journal_info;
(31)、塊裝置連結串列
[cpp] view plain copy print?
/* stacked block device info */
struct bio_list *bio_list;
/* stacked block device info */
struct bio_list *bio_list;
(32)、記憶體回收
[cpp] view plain copy print?
struct reclaim_state *reclaim_state;
struct reclaim_state *reclaim_state;
(33)、存放塊裝置I/O資料流量資訊
[cpp] view plain copy print?
struct backing_dev_info *backing_dev_info;
struct backing_dev_info *backing_dev_info;
(34)、I/O排程器所使用的資訊
[cpp] view plain copy print?
struct io_context *io_context;
struct io_context *io_context;
(35)、記錄程序的I/O計數
[cpp] view plain copy print?
struct task_io_accounting ioac;
if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage /
u64 acct_vm_mem1; / accumulated virtual memory usage /
cputime_t acct_timexpd; / stime + utime since last update */
endif
struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage /
u64 acct_vm_mem1; / accumulated virtual memory usage /
cputime_t acct_timexpd; / stime + utime since last update */
#endif
在Ubuntu 11.04上,執行cat獲得程序1的I/O計數如下:
[cpp] view plain copy print?
$ sudo cat /proc/1/io
$ sudo cat /proc/1/io
[cpp] view plain copy print?
rchar: 164258906
wchar: 455212837
syscr: 388847
syscw: 92563
read_bytes: 439251968
write_bytes: 14143488
cancelled_write_bytes: 2134016
rchar: 164258906
wchar: 455212837
syscr: 388847
syscw: 92563
read_bytes: 439251968
write_bytes: 14143488
cancelled_write_bytes: 2134016
輸出的資料項剛好是task_io_accounting結構體的所有成員。
(36)、CPUSET功能
[cpp] view plain copy print?
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
(37)、Control Groups
[cpp] view plain copy print?
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
struct css_set __rcu cgroups;
/ cg_list protected by css_set_lock and tsk->alloc_lock /
struct list_head cg_list;
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR / memcg uses this to do batch job /
struct memcg_batch_info {
int do_batch; / incremented when batch uncharge started */
struct mem_cgroup memcg; / target memcg of uncharge /
unsigned long bytes; / uncharged usage /
unsigned long memsw_bytes; / uncharged mem+swap usage /
} memcg_batch;
#endif
#ifdef CONFIG_CGROUPS
/ Control Group info protected by css_set_lock */
struct css_set __rcu cgroups;
/ cg_list protected by css_set_lock and tsk->alloc_lock /
struct list_head cg_list;
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR / memcg uses this to do batch job /
struct memcg_batch_info {
int do_batch; / incremented when batch uncharge started */
struct mem_cgroup memcg; / target memcg of uncharge /
unsigned long bytes; / uncharged usage /
unsigned long memsw_bytes; / uncharged mem+swap usage */
} memcg_batch;
#endif
(38)、futex同步機制
[cpp] view plain copy print?
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
(39)、非一致記憶體訪問(NUMA Non-Uniform Memory Access)
[cpp] view plain copy print?
#ifdef CONFIG_NUMA
struct mempolicy mempolicy; / Protected by alloc_lock */
short il_next;
#endif
#ifdef CONFIG_NUMA
struct mempolicy mempolicy; / Protected by alloc_lock */
short il_next;
#endif
(40)、檔案系統互斥資源
[cpp] view plain copy print?
atomic_t fs_excl; /* holding fs exclusive resources */
atomic_t fs_excl; /* holding fs exclusive resources */
(41)、RCU連結串列
[cpp] view plain copy print?
struct rcu_head rcu;
struct rcu_head rcu;
(42)、管道
[cpp] view plain copy print?
struct pipe_inode_info *splice_pipe;
struct pipe_inode_info *splice_pipe;
(43)、延遲計數
[cpp] view plain copy print?
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
(44)、fault injection,參考核心說明檔案linux-2.6.38.8/Documentation/fault-injection/fault-injection.txt
[cpp] view plain copy print?
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
(45)、FLoating proportions
[cpp] view plain copy print?
struct prop_local_single dirties;
struct prop_local_single dirties;
(46)、Infrastructure for displayinglatency
[cpp] view plain copy print?
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
(47)、time slack values,常用於poll和select函式
[cpp] view plain copy print?
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
(48)、socket控制訊息(control message)
[cpp] view plain copy print?
struct list_head *scm_work_list;
struct list_head *scm_work_list;
(49)、ftrace跟蹤器
[cpp] view plain copy print?
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack /
int curr_ret_stack;
/ Stack of return addresses for return function tracing */
struct ftrace_ret_stack ret_stack;
/ time stamp for last schedule /
unsigned long long ftrace_timestamp;
/
* Number of functions that haven’t been traced
* because of depth overrun.
/
atomic_t trace_overrun;
/ Pause for the tracing /
atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
/ state flags for use by tracers /
unsigned long trace;
/ bitmask of trace recursion /
unsigned long trace_recursion;
#endif / CONFIG_TRACING */