1. 程式人生 > >task_struct 結構如何檢視及分析

task_struct 結構如何檢視及分析

    cd /
    find -name sched.h
    vim usr/src/kernels/3.10.0862.6.3.el7.x86_64/include/linux/sched.h

https://www.cnblogs.com/zxc2man/p/6649771.html

程序是處於執行期的程式以及它所管理的資源(如開啟的檔案、掛起的訊號、程序狀態、地址空間等等)的總稱。注意,程式並不是程序,實際上兩個或多個程序不僅有可能執行同一程式,而且還有可能共享地址空間等資源。

Linux核心通過一個被稱為程序描述符的task_struct結構體來管理程序,這個結構體包含了一個程序所需的所有資訊。它定義在linux-2.6.38.8/include/linux/sched.h檔案中。

本文將盡力就task_struct結構體所有成員的用法進行簡要說明。

1、程序狀態 

[cpp] view plain copy print?
volatile long state;
int exit_state;
volatile long state;
int exit_state;
state成員的可能取值如下:

[cpp] view plain copy print?
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/* in tsk->exit_state /
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/

in tsk->state again /
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/
in tsk->exit_state /
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/
in tsk->state again */
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
系統中的每個程序都必然處於以上所列程序狀態中的一種。

TASK_RUNNING表示程序要麼正在執行,要麼正要準備執行。

TASK_INTERRUPTIBLE表示程序被阻塞(睡眠),直到某個條件變為真。條件一旦達成,程序的狀態就被設定為TASK_RUNNING。

TASK_UNINTERRUPTIBLE的意義與TASK_INTERRUPTIBLE類似,除了不能通過接受一個訊號來喚醒以外。

__TASK_STOPPED表示程序被停止執行。

__TASK_TRACED表示程序被debugger等程序監視。

EXIT_ZOMBIE表示程序的執行被終止,但是其父程序還沒有使用wait()等系統呼叫來獲知它的終止資訊。

EXIT_DEAD表示程序的最終狀態。

EXIT_ZOMBIE和EXIT_DEAD也可以存放在exit_state成員中。程序狀態的切換過程和原因大致如下圖(圖片來自《Linux Kernel Development》):



2、程序識別符號(PID) 

[cpp] view plain copy print?
pid_t pid;
pid_t tgid;

pid_t pid;
pid_t tgid;
在CONFIG_BASE_SMALL配置為0的情況下,PID的取值範圍是0到32767,即系統中的程序數最大為32768個。 

[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/threads.h */
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)

/* linux-2.6.38.8/include/linux/threads.h */
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
在Linux系統中,一個執行緒組中的所有執行緒使用和該執行緒組的領頭執行緒(該組中的第一個輕量級程序)相同的PID,並被存放在tgid成員中。只有執行緒組的領頭執行緒的pid成員才會被設定為與tgid相同的值。注意,getpid()系統呼叫返回的是當前程序的tgid值而不是pid值。

3、程序核心棧 

[cpp] view plain copy print?
void *stack;

void *stack;
程序通過alloc_thread_info函式分配它的核心棧,通過free_thread_info函式釋放所分配的核心棧。 

[cpp] view plain copy print?
/* linux-2.6.38.8/kernel/fork.c */
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
{
#ifdef CONFIG_DEBUG_STACK_USAGE
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
#else
gfp_t mask = GFP_KERNEL;
#endif
return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
}
static inline void free_thread_info(struct thread_info *ti)
{
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}

/* linux-2.6.38.8/kernel/fork.c */
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
{
#ifdef CONFIG_DEBUG_STACK_USAGE
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
#else
gfp_t mask = GFP_KERNEL;
#endif
return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
}
static inline void free_thread_info(struct thread_info *ti)
{
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
其中,THREAD_SIZE_ORDER巨集在linux-2.6.38.8/arch/arm/include/asm/thread_info.h檔案中被定義為1,也就是說alloc_thread_info函式通過呼叫__get_free_pages函式分配2個頁的記憶體(它的首地址是8192位元組對齊的)。

Linux核心通過thread_union聯合體來表示程序的核心棧,其中THREAD_SIZE巨集的大小為8192。 

[cpp] view plain copy print?
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};

union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
當程序從使用者態切換到核心態時,程序的核心棧總是空的,所以ARM的sp暫存器指向這個棧的頂端。因此,核心能夠輕易地通過sp暫存器獲得當前正在CPU上執行的程序。

[cpp] view plain copy print?
/* linux-2.6.38.8/arch/arm/include/asm/current.h */
static inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}

#define current (get_current())

/* linux-2.6.38.8/arch/arm/include/asm/thread_info.h */
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm (“sp”);
return (struct thread_info )(sp & ~(THREAD_SIZE - 1));
}
/
linux-2.6.38.8/arch/arm/include/asm/current.h */
static inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}

#define current (get_current())

/* linux-2.6.38.8/arch/arm/include/asm/thread_info.h */
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm (“sp”);
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
程序核心棧與程序描述符的關係如下圖:

4、標記 

[cpp] view plain copy print?
unsigned int flags; /* per process flags, defined below */

unsigned int flags;	/* per process flags, defined below */
flags成員的可能取值如下: 

[cpp] view plain copy print?
#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd /
#define PF_STARTING 0x00000002 /
being created /
#define PF_EXITING 0x00000004 /
getting shut down /
#define PF_EXITPIDONE 0x00000008 /
pi exit done on shut down /
#define PF_VCPU 0x00000010 /
I’m a virtual CPU /
#define PF_WQ_WORKER 0x00000020 /
I’m a workqueue worker /
#define PF_FORKNOEXEC 0x00000040 /
forked but didn’t exec /
#define PF_MCE_PROCESS 0x00000080 /
process policy on mce errors /
#define PF_SUPERPRIV 0x00000100 /
used super-user privileges /
#define PF_DUMPCORE 0x00000200 /
dumped core /
#define PF_SIGNALED 0x00000400 /
killed by a signal /
#define PF_MEMALLOC 0x00000800 /
Allocating memory /
#define PF_USED_MATH 0x00002000 /
if unset the fpu must be initialized before use /
#define PF_FREEZING 0x00004000 /
freeze in progress. do not account to load /
#define PF_NOFREEZE 0x00008000 /
this thread should not be frozen /
#define PF_FROZEN 0x00010000 /
frozen for system suspend /
#define PF_FSTRANS 0x00020000 /
inside a filesystem transaction /
#define PF_KSWAPD 0x00040000 /
I am kswapd /
#define PF_OOM_ORIGIN 0x00080000 /
Allocating much memory to others /
#define PF_LESS_THROTTLE 0x00100000 /
Throttle me less: I clean memory /
#define PF_KTHREAD 0x00200000 /
I am a kernel thread /
#define PF_RANDOMIZE 0x00400000 /
randomize virtual address space /
#define PF_SWAPWRITE 0x00800000 /
Allowed to write to swap /
#define PF_SPREAD_PAGE 0x01000000 /
Spread page cache over cpuset /
#define PF_SPREAD_SLAB 0x02000000 /
Spread some slab caches over cpuset /
#define PF_THREAD_BOUND 0x04000000 /
Thread bound to specific cpu /
#define PF_MCE_EARLY 0x08000000 /
Early kill for mce process policy /
#define PF_MEMPOLICY 0x10000000 /
Non-default NUMA mempolicy /
#define PF_MUTEX_TESTER 0x20000000 /
Thread belongs to the rt mutex tester /
#define PF_FREEZER_SKIP 0x40000000 /
Freezer should not count it as freezable /
#define PF_FREEZER_NOSIG 0x80000000 /
Freezer won’t send signals to it /
#define PF_KSOFTIRQD 0x00000001 /
I am ksoftirqd /
#define PF_STARTING 0x00000002 /
being created /
#define PF_EXITING 0x00000004 /
getting shut down /
#define PF_EXITPIDONE 0x00000008 /
pi exit done on shut down /
#define PF_VCPU 0x00000010 /
I’m a virtual CPU /
#define PF_WQ_WORKER 0x00000020 /
I’m a workqueue worker /
#define PF_FORKNOEXEC 0x00000040 /
forked but didn’t exec /
#define PF_MCE_PROCESS 0x00000080 /
process policy on mce errors /
#define PF_SUPERPRIV 0x00000100 /
used super-user privileges /
#define PF_DUMPCORE 0x00000200 /
dumped core /
#define PF_SIGNALED 0x00000400 /
killed by a signal /
#define PF_MEMALLOC 0x00000800 /
Allocating memory /
#define PF_USED_MATH 0x00002000 /
if unset the fpu must be initialized before use /
#define PF_FREEZING 0x00004000 /
freeze in progress. do not account to load /
#define PF_NOFREEZE 0x00008000 /
this thread should not be frozen /
#define PF_FROZEN 0x00010000 /
frozen for system suspend /
#define PF_FSTRANS 0x00020000 /
inside a filesystem transaction /
#define PF_KSWAPD 0x00040000 /
I am kswapd /
#define PF_OOM_ORIGIN 0x00080000 /
Allocating much memory to others /
#define PF_LESS_THROTTLE 0x00100000 /
Throttle me less: I clean memory /
#define PF_KTHREAD 0x00200000 /
I am a kernel thread /
#define PF_RANDOMIZE 0x00400000 /
randomize virtual address space /
#define PF_SWAPWRITE 0x00800000 /
Allowed to write to swap /
#define PF_SPREAD_PAGE 0x01000000 /
Spread page cache over cpuset /
#define PF_SPREAD_SLAB 0x02000000 /
Spread some slab caches over cpuset /
#define PF_THREAD_BOUND 0x04000000 /
Thread bound to specific cpu /
#define PF_MCE_EARLY 0x08000000 /
Early kill for mce process policy /
#define PF_MEMPOLICY 0x10000000 /
Non-default NUMA mempolicy /
#define PF_MUTEX_TESTER 0x20000000 /
Thread belongs to the rt mutex tester /
#define PF_FREEZER_SKIP 0x40000000 /
Freezer should not count it as freezable /
#define PF_FREEZER_NOSIG 0x80000000 /
Freezer won’t send signals to it */
5、表示程序親屬關係的成員

[cpp] view plain copy print?
struct task_struct real_parent; / real parent process */
struct task_struct parent; / recipient of SIGCHLD, wait4() reports /
struct list_head children; /
list of my children /
struct list_head sibling; /
linkage in my parent’s children list */
struct task_struct group_leader; / threadgroup leader */
struct task_struct real_parent; / real parent process */
struct task_struct parent; / recipient of SIGCHLD, wait4() reports /
struct list_head children; /
list of my children /
struct list_head sibling; /
linkage in my parent’s children list */
struct task_struct group_leader; / threadgroup leader */
在Linux系統中,所有程序之間都有著直接或間接地聯絡,每個程序都有其父程序,也可能有零個或多個子程序。擁有同一父程序的所有程序具有兄弟關係。

real_parent指向其父程序,如果建立它的父程序不再存在,則指向PID為1的init程序。

parent指向其父程序,當它終止時,必須向它的父程序傳送訊號。它的值通常與real_parent相同。

children表示連結串列的頭部,連結串列中的所有元素都是它的子程序。

sibling用於把當前程序插入到兄弟連結串列中。

group_leader指向其所在程序組的領頭程序。

6、ptrace系統呼叫 

[cpp] view plain copy print?
unsigned int ptrace;
struct list_head ptraced;
struct list_head ptrace_entry;
unsigned long ptrace_message;
siginfo_t last_siginfo; / For ptrace use. */
ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
endif

unsigned int ptrace;
struct list_head ptraced;
struct list_head ptrace_entry;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use.  */

#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
#endif
成員ptrace被設定為0時表示不需要被跟蹤,它的可能取值如下:

[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/ptrace.h /
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 /
delayed trace (used on m68k, i386) /
#define PT_TRACESYSGOOD 0x00000004
#define PT_PTRACE_CAP 0x00000008 /
ptracer can follow suid-exec */
#define PT_TRACE_FORK 0x00000010
#define PT_TRACE_VFORK 0x00000020
#define PT_TRACE_CLONE 0x00000040
#define PT_TRACE_EXEC 0x00000080
#define PT_TRACE_VFORK_DONE 0x00000100
#define PT_TRACE_EXIT 0x00000200

/* linux-2.6.38.8/include/linux/ptrace.h /
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 /
delayed trace (used on m68k, i386) /
#define PT_TRACESYSGOOD 0x00000004
#define PT_PTRACE_CAP 0x00000008 /
ptracer can follow suid-exec */
#define PT_TRACE_FORK 0x00000010
#define PT_TRACE_VFORK 0x00000020
#define PT_TRACE_CLONE 0x00000040
#define PT_TRACE_EXEC 0x00000080
#define PT_TRACE_VFORK_DONE 0x00000100
#define PT_TRACE_EXIT 0x00000200
7、Performance Event

[cpp] view plain copy print?
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif

#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
Performance Event是一款隨 Linux 核心程式碼一同釋出和維護的效能診斷工具。這些成員用於幫助PerformanceEvent分析程序的效能問題。

關於Performance Event工具的介紹可參考文章http://www.ibm.com/developerworks/cn/linux/l-cn-perf1/index.html?ca=drs-#major1和http://www.ibm.com/developerworks/cn/linux/l-cn-perf2/index.html?ca=drs-#major1。

8、程序排程 

[cpp] view plain copy print?
int prio, static_prio, normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
unsigned int policy;
cpumask_t cpus_allowed;

int prio, static_prio, normal_prio;
unsigned int rt_priority;
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
unsigned int policy;
cpumask_t cpus_allowed;
實時優先順序範圍是0到MAX_RT_PRIO-1(即99),而普通程序的靜態優先順序範圍是從MAX_RT_PRIO到MAX_PRIO-1(即100到139)。值越大靜態優先順序越低。 

[cpp] view plain copy print?
/* linux-2.6.38.8/include/linux/sched.h */
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO

#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)

/* linux-2.6.38.8/include/linux/sched.h */
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO

#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static_prio用於儲存靜態優先順序,可以通過nice系統呼叫來進行修改。

rt_priority用於儲存實時優先順序。

normal_prio的值取決於靜態優先順序和排程策略。

prio用於儲存動態優先順序。

policy表示程序的排程策略,目前主要有以下五種: 

[cpp] view plain copy print?
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5

#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
SCHED_NORMAL用於普通程序,通過CFS排程器實現。SCHED_BATCH用於非互動的處理器消耗型程序。SCHED_IDLE是在系統負載很低時使用。

SCHED_FIFO(先入先出排程演算法)和SCHED_RR(輪流排程演算法)都是實時排程策略。

sched_class結構體表示排程類,目前核心中有實現以下四種: 

[cpp] view plain copy print?
/* linux-2.6.38.8/kernel/sched_fair.c /
static const struct sched_class fair_sched_class;
/
linux-2.6.38.8/kernel/sched_rt.c /
static const struct sched_class rt_sched_class;
/
linux-2.6.38.8/kernel/sched_idletask.c /
static const struct sched_class idle_sched_class;
/
linux-2.6.38.8/kernel/sched_stoptask.c */
static const struct sched_class stop_sched_class;

/* linux-2.6.38.8/kernel/sched_fair.c /
static const struct sched_class fair_sched_class;
/
linux-2.6.38.8/kernel/sched_rt.c /
static const struct sched_class rt_sched_class;
/
linux-2.6.38.8/kernel/sched_idletask.c /
static const struct sched_class idle_sched_class;
/
linux-2.6.38.8/kernel/sched_stoptask.c */
static const struct sched_class stop_sched_class;
se和rt都是呼叫實體,一個用於普通程序,一個用於實時程序,每個程序都有其中之一的實體。

cpus_allowed用於控制程序可以在哪裡處理器上執行。

9、程序地址空間

[cpp] view plain copy print?
struct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif

struct mm_struct *mm, *active_mm;

#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif
mm指向程序所擁有的記憶體描述符,而active_mm指向程序執行時所使用的記憶體描述符。對於普通程序而言,這兩個指標變數的值相同。但是,核心執行緒不擁有任何記憶體描述符,所以它們的mm成員總是為NULL。當核心執行緒得以執行時,它的active_mm成員被初始化為前一個執行程序的active_mm值。

brk_randomized的用法在http://lkml.indiana.edu/hypermail/Linux/kernel/1104.1/00196.html上有介紹,用來確定對隨機堆記憶體的探測。

rss_stat用來記錄緩衝資訊。 

10、判斷標誌 

[cpp] view plain copy print?
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies /
/
??? /
unsigned int personality;
unsigned did_exec:1;
unsigned in_execve:1; /
Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;

/* Revert to default priority/policy when forking /
unsigned sched_reset_on_fork:1;
int exit_code, exit_signal;
int pdeath_signal; /
The signal sent when the parent dies /
/
??? /
unsigned int personality;
unsigned did_exec:1;
unsigned in_execve:1; /
Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;

/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1;
exit_code用於設定程序的終止代號,這個值要麼是_exit()或exit_group()系統呼叫引數(正常終止),要麼是由核心提供的一個錯誤代號(異常終止)。

exit_signal被置為-1時表示是某個執行緒組中的一員。只有當執行緒組的最後一個成員終止時,才會產生一個訊號,以通知執行緒組的領頭程序的父程序。

pdeath_signal用於判斷父程序終止時傳送訊號。

personality用於處理不同的ABI,它的可能取值如下: 

[cpp] view plain copy print?
enum {
PER_LINUX = 0x0000,
PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS |
WHOLE_SECONDS | SHORT_INODE,
PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
PER_BSD = 0x0006,
PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
PER_LINUX32 = 0x0008,
PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit /
PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/
IRIX6 new 32-bit /
PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/
IRIX6 64-bit /
PER_RISCOS = 0x000c,
PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_OSF4 = 0x000f, /
OSF/1 v4 /
PER_HPUX = 0x0010,
PER_MASK = 0x00ff,
};
enum {
PER_LINUX = 0x0000,
PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS |
WHOLE_SECONDS | SHORT_INODE,
PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
PER_BSD = 0x0006,
PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
PER_LINUX32 = 0x0008,
PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,/
IRIX5 32-bit /
PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,/
IRIX6 new 32-bit /
PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,/
IRIX6 64-bit /
PER_RISCOS = 0x000c,
PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
PER_OSF4 = 0x000f, /
OSF/1 v4 */
PER_HPUX = 0x0010,
PER_MASK = 0x00ff,
};
did_exec用於記錄程序程式碼是否被execve()函式所執行。

in_execve用於通知LSM是否被do_execve()函式所呼叫。詳見補丁說明:http://lkml.indiana.edu/hypermail/linux/kernel/0901.1/00014.html。

in_iowait用於判斷是否進行iowait計數。

sched_reset_on_fork用於判斷是否恢復預設的優先順序或排程策略。

11、時間 

[cpp] view plain copy print?
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts /
struct timespec start_time; /
monotonic time /
struct timespec real_start_time; /
boot based time /
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
#ifdef CONFIG_DETECT_HUNG_TASK
/
hung task detection /
unsigned long last_switch_count;
#endif
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
#endif
unsigned long nvcsw, nivcsw; /
context switch counts /
struct timespec start_time; /
monotonic time /
struct timespec real_start_time; /
boot based time /
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
#ifdef CONFIG_DETECT_HUNG_TASK
/
hung task detection */
unsigned long last_switch_count;
#endif
utime/stime用於記錄程序在使用者態/核心態下所經過的節拍數(定時器)。prev_utime/prev_stime是先前的執行時間,請參考補丁說明http://lkml.indiana.edu/hypermail/linux/kernel/1003.3/02431.html。

utimescaled/stimescaled也是用於記錄程序在使用者態/核心態的執行時間,但它們以處理器的頻率為刻度。

gtime是以節拍計數的虛擬機器執行時間(guest time)。

nvcsw/nivcsw是自願(voluntary)/非自願(involuntary)上下文切換計數。last_switch_count是nvcsw和nivcsw的總和。

start_time和real_start_time都是程序建立時間,real_start_time還包含了程序睡眠時間,常用於/proc/pid/stat,補丁說明請參考http://lkml.indiana.edu/hypermail/linux/kernel/0705.0/2094.html。

cputime_expires用來統計程序或程序組被跟蹤的處理器時間,其中的三個成員對應著cpu_timers[3]的三個連結串列。

12、訊號處理 

[cpp] view plain copy print?
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;

sigset_t blocked, real_blocked;  
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */  
struct sigpending pending;  

unsigned long sas_ss_sp;  
size_t sas_ss_size;  
int (*notifier)(void *priv);  
void *notifier_data;  
sigset_t *notifier_mask;  

/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;

sigset_t blocked, real_blocked;
sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
struct sigpending pending;

unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
signal指向程序的訊號描述符。

sighand指向程序的訊號處理程式描述符。

blocked表示被阻塞訊號的掩碼,real_blocked表示臨時掩碼。

pending存放私有掛起訊號的資料結構。

sas_ss_sp是訊號處理程式備用堆疊的地址,sas_ss_size表示堆疊的大小。

裝置驅動程式常用notifier指向的函式來阻塞程序的某些訊號(notifier_mask是這些訊號的位掩碼),notifier_data指的是notifier所指向的函式可能使用的資料。

13、其他

(1)、用於保護資源分配或釋放的自旋鎖 

[cpp] view plain copy print?
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,

  • mempolicy */
    spinlock_t alloc_lock;

/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,

  • mempolicy */
    spinlock_t alloc_lock;
    (2)、程序描述符使用計數,被置為2時,表示程序描述符正在被使用而且其相應的程序處於活動狀態。

[cpp] view plain copy print?
atomic_t usage;

atomic_t usage;
(3)、用於表示獲取大核心鎖的次數,如果程序未獲得過鎖,則置為-1。 

[cpp] view plain copy print?
int lock_depth; /* BKL lock depth */

int lock_depth;		/* BKL lock depth */
(4)、在SMP上幫助實現無加鎖的程序切換(unlocked context switches) 

[cpp] view plain copy print?
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
#endif
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
#endif
(5)、preempt_notifier結構體連結串列

[cpp] view plain copy print?
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
(6)、FPU使用計數

[cpp] view plain copy print?
unsigned char fpu_counter;

unsigned char fpu_counter;
(7)、blktrace是一個針對Linux核心中塊裝置I/O層的跟蹤工具。 

[cpp] view plain copy print?
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
(8)、RCU同步原語

[cpp] view plain copy print?
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node rcu_blocked_node;
#endif /
#ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
struct rt_mutex rcu_boost_mutex;
#endif /
#ifdef CONFIG_RCU_BOOST */

#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node rcu_blocked_node;
#endif /
#ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
struct rt_mutex rcu_boost_mutex;
#endif /
#ifdef CONFIG_RCU_BOOST */
(9)、用於排程器統計程序的執行資訊

[cpp] view plain copy print?
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
(10)、用於構建程序連結串列

[cpp] view plain copy print?
struct list_head tasks;

struct list_head tasks;
(11)、to limit pushing to one attempt 

[cpp] view plain copy print?
#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
#endif

#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
#endif
補丁說明請參考:http://lkml.indiana.edu/hypermail/linux/kernel/0808.3/0503.html

(12)、防止核心堆疊溢位 

[cpp] view plain copy print?
#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature /
unsigned long stack_canary;
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
/
Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
#endif
在GCC編譯核心時,需要加上-fstack-protector選項。

(13)、PID散列表和連結串列 

[cpp] view plain copy print?
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group; //執行緒組中所有程序的連結串列

/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group; //執行緒組中所有程序的連結串列
(14)、do_fork函式 

[cpp] view plain copy print?
struct completion vfork_done; / for vfork() */
int __user set_child_tid; / CLONE_CHILD_SETTID */
int __user clear_child_tid; / CLONE_CHILD_CLEARTID */

struct completion *vfork_done;		/* for vfork() */
int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
在執行do_fork()時,如果給定特別標誌,則vfork_done會指向一個特殊地址。

如果copy_process函式的clone_flags引數的值被置為CLONE_CHILD_SETTID或CLONE_CHILD_CLEARTID,則會把child_tidptr引數的值分別複製到set_child_tid和clear_child_tid成員。這些標誌說明必須改變子程序使用者態地址空間的child_tidptr所指向的變數的值。

(15)、缺頁統計 

[cpp] view plain copy print?
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;

/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
(16)、程序權能

[cpp] view plain copy print?
const struct cred __rcu real_cred; / objective and real subjective task
* credentials (COW) */
const struct cred __rcu cred; / effective (overridable) subjective task
* credentials (COW) */
struct cred replacement_session_keyring; / for KEYCTL_SESSION_TO_PARENT */

const struct cred __rcu *real_cred; /* objective and real subjective task
				 * credentials (COW) */
const struct cred __rcu *cred;	/* effective (overridable) subjective task
				 * credentials (COW) */
struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
(17)、相應的程式名 

[cpp] view plain copy print?
char comm[TASK_COMM_LEN];
char comm[TASK_COMM_LEN];
(18)、檔案

[cpp] view plain copy print?
/* file system info /
int link_count, total_link_count;
/
filesystem information */
struct fs_struct fs;
/
open file information */
struct files_struct *files;

/* file system info /
int link_count, total_link_count;
/
filesystem information */
struct fs_struct fs;
/
open file information */
struct files_struct *files;
fs用來表示程序與檔案系統的聯絡,包括當前目錄和根目錄。

files表示程序當前開啟的檔案。

(19)、程序通訊(SYSVIPC) 

[cpp] view plain copy print?
#ifdef CONFIG_SYSVIPC
/* ipc stuff /
struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_SYSVIPC
/
ipc stuff */
struct sysv_sem sysvsem;
#endif
(20)、處理器特有資料

[cpp] view plain copy print?
/* CPU-specific state of this task */
struct thread_struct thread;

/* CPU-specific state of this task */
struct thread_struct thread;
(21)、名稱空間

[cpp] view plain copy print?
/* namespaces */
struct nsproxy *nsproxy;

/* namespaces */
struct nsproxy *nsproxy;
(22)、程序審計

[cpp] view plain copy print?
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif

struct audit_context *audit_context;

#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif
(23)、secure computing

[cpp] view plain copy print?
seccomp_t seccomp;

seccomp_t seccomp;
(24)、用於copy_process函式使用CLONE_PARENT 標記時 

[cpp] view plain copy print?
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;

/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
(25)、中斷

[cpp] view plain copy print?
#ifdef CONFIG_GENERIC_HARDIRQS
/* IRQ handler threads */
struct irqaction irqaction;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
#endif
#ifdef CONFIG_GENERIC_HARDIRQS
/
IRQ handler threads */
struct irqaction *irqaction;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
#endif
(26)、task_rq_lock函式所使用的鎖

[cpp] view plain copy print?
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;

/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
(27)、基於PI協議的等待互斥鎖,其中PI指的是priority inheritance(優先順序繼承) 

[cpp] view plain copy print?
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task /
struct plist_head pi_waiters;
/
Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif

#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task /
struct plist_head pi_waiters;
/
Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
(28)、死鎖檢測

[cpp] view plain copy print?
#ifdef CONFIG_DEBUG_MUTEXES
/* mutex deadlock detection */
struct mutex_waiter blocked_on;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
/
mutex deadlock detection */
struct mutex_waiter *blocked_on;
#endif
(29)、lockdep,參見核心說明文件linux-2.6.38.8/Documentation/lockdep-design.txt

[cpp] view plain copy print?
#ifdef CONFIG_LOCKDEP

define MAX_LOCK_DEPTH 48UL

u64 curr_chain_key;  
int lockdep_depth;  
unsigned int lockdep_recursion;  
struct held_lock held_locks[MAX_LOCK_DEPTH];  
gfp_t lockdep_reclaim_gfp;  

#endif

#ifdef CONFIG_LOCKDEP

define MAX_LOCK_DEPTH 48UL

u64 curr_chain_key;
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;

#endif
(30)、JFS檔案系統

[cpp] view plain copy print?
/* journalling filesystem info */
void journal_info;
/
journalling filesystem info */
void *journal_info;
(31)、塊裝置連結串列

[cpp] view plain copy print?
/* stacked block device info */
struct bio_list *bio_list;

/* stacked block device info */
struct bio_list *bio_list;
(32)、記憶體回收

[cpp] view plain copy print?
struct reclaim_state *reclaim_state;

struct reclaim_state *reclaim_state;
(33)、存放塊裝置I/O資料流量資訊

[cpp] view plain copy print?
struct backing_dev_info *backing_dev_info;

struct backing_dev_info *backing_dev_info;
(34)、I/O排程器所使用的資訊 

[cpp] view plain copy print?
struct io_context *io_context;
struct io_context *io_context;
(35)、記錄程序的I/O計數

[cpp] view plain copy print?
struct task_io_accounting ioac;
if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage /
u64 acct_vm_mem1; /
accumulated virtual memory usage /
cputime_t acct_timexpd; /
stime + utime since last update */
endif

struct task_io_accounting ioac;

#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage /
u64 acct_vm_mem1; /
accumulated virtual memory usage /
cputime_t acct_timexpd; /
stime + utime since last update */
#endif
在Ubuntu 11.04上,執行cat獲得程序1的I/O計數如下:

[cpp] view plain copy print?
$ sudo cat /proc/1/io

$ sudo cat /proc/1/io
[cpp] view plain copy print?
rchar: 164258906
wchar: 455212837
syscr: 388847
syscw: 92563
read_bytes: 439251968
write_bytes: 14143488
cancelled_write_bytes: 2134016
rchar: 164258906
wchar: 455212837
syscr: 388847
syscw: 92563
read_bytes: 439251968
write_bytes: 14143488
cancelled_write_bytes: 2134016
輸出的資料項剛好是task_io_accounting結構體的所有成員。

(36)、CPUSET功能 

[cpp] view plain copy print?
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif

#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
(37)、Control Groups

[cpp] view plain copy print?
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
struct css_set __rcu cgroups;
/
cg_list protected by css_set_lock and tsk->alloc_lock /
struct list_head cg_list;
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /
memcg uses this to do batch job /
struct memcg_batch_info {
int do_batch; /
incremented when batch uncharge started */
struct mem_cgroup memcg; / target memcg of uncharge /
unsigned long bytes; /
uncharged usage /
unsigned long memsw_bytes; /
uncharged mem+swap usage /
} memcg_batch;
#endif
#ifdef CONFIG_CGROUPS
/
Control Group info protected by css_set_lock */
struct css_set __rcu cgroups;
/
cg_list protected by css_set_lock and tsk->alloc_lock /
struct list_head cg_list;
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /
memcg uses this to do batch job /
struct memcg_batch_info {
int do_batch; /
incremented when batch uncharge started */
struct mem_cgroup memcg; / target memcg of uncharge /
unsigned long bytes; /
uncharged usage /
unsigned long memsw_bytes; /
uncharged mem+swap usage */
} memcg_batch;
#endif
(38)、futex同步機制

[cpp] view plain copy print?
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
(39)、非一致記憶體訪問(NUMA Non-Uniform Memory Access)

[cpp] view plain copy print?
#ifdef CONFIG_NUMA
struct mempolicy mempolicy; / Protected by alloc_lock */
short il_next;
#endif

#ifdef CONFIG_NUMA
struct mempolicy mempolicy; / Protected by alloc_lock */
short il_next;
#endif
(40)、檔案系統互斥資源

[cpp] view plain copy print?
atomic_t fs_excl; /* holding fs exclusive resources */

atomic_t fs_excl;	/* holding fs exclusive resources */
(41)、RCU連結串列 

[cpp] view plain copy print?
struct rcu_head rcu;

struct rcu_head rcu;
(42)、管道 

[cpp] view plain copy print?
struct pipe_inode_info *splice_pipe;

struct pipe_inode_info *splice_pipe;
(43)、延遲計數 

[cpp] view plain copy print?
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif

#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
(44)、fault injection,參考核心說明檔案linux-2.6.38.8/Documentation/fault-injection/fault-injection.txt

[cpp] view plain copy print?
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
(45)、FLoating proportions

[cpp] view plain copy print?
struct prop_local_single dirties;

struct prop_local_single dirties;
(46)、Infrastructure for displayinglatency 

[cpp] view plain copy print?
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif

#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
(47)、time slack values,常用於poll和select函式

[cpp] view plain copy print?
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
(48)、socket控制訊息(control message)

[cpp] view plain copy print?
struct list_head *scm_work_list;

struct list_head	*scm_work_list;
(49)、ftrace跟蹤器 

[cpp] view plain copy print?
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack /
int curr_ret_stack;
/
Stack of return addresses for return function tracing */
struct ftrace_ret_stack ret_stack;
/
time stamp for last schedule /
unsigned long long ftrace_timestamp;
/

* Number of functions that haven’t been traced
* because of depth overrun.
/
atomic_t trace_overrun;
/
Pause for the tracing /
atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
/
state flags for use by tracers /
unsigned long trace;
/
bitmask of trace recursion /
unsigned long trace_recursion;
#endif /
CONFIG_TRACING */