死鎖檢測實現

阿新 • • 發佈：2021-10-06

一、背景

　　在工作專案使用多程序、多執行緒過程中，因爭奪資源而造成一種資源競態，所以需加鎖處理。如下圖所示，執行緒A想獲取執行緒B的鎖，執行緒B想獲取執行緒C的鎖，執行緒 C 想獲取執行緒D的鎖，執行緒D想獲取執行緒A的鎖，從而構建了一個資源獲取環，當程序或者執行緒申請的鎖處於相互交叉鎖住的情況，就會出現死鎖，它們將無法繼續執行。

　　死鎖的存在是因為有資源獲取環的存在，所以只要能檢測出資源獲取環，就等同於檢測出死鎖的存在。

二、原理

　　在不改變專案原始碼的情況下，採用圖演算法來檢測環的存在，使用有向圖來儲存；如執行緒A獲取執行緒B已佔用的鎖（表示執行緒B獲取鎖成功），則為執行緒A指向執行緒B；啟動一個執行緒定時對圖進行檢測是否有環的存在。

　　（1）資料結構

//資料/點
struct node{

    uint64 thread_id;//執行緒ID
    uint64 lock_id;//鎖ID
    int degress;
};

//資料和資料結構分開
struct vertex{

    struct node *d;
    struct vertex *next;
};

struct graph{

    struct vertex list[THREAD_MAX];//儲存圖的所有節點
    int num;//已經使用了多少個

    struct node locklist[THREAD_MAX];
    int 
 lockidx;
    
    pthread_mutex_t mutex;//執行緒安全考慮，在對圖修改時加鎖
};

　　（2）圖的操作

　　　　a.建立圖節點

//建立圖節點
struct vertex *create_vertex(struct node *d){

    struct vertex *tex =  (struct vertex*)calloc(1,sizeof(struct vertex));
    if(tex == NULL) return NULL;

    tex->d = d;
    tex->next = NULL;
    return 
 tex;
}

　　b.查詢節點

//查詢節點，是否存在
int search_vertex(struct node *d){

    int i;
    for (i = 0; i < tg->num; i++)
    {
        if (tg->list[i].d->thread_id == d->thread_id)
        {
            return i;
        }
    }
    return -1;
}

　　c.新增節點

//新增節點，只是把新增的節點放到list中，還沒有確定各節點間的指向，必須通過add_edge新增邊來確定
void add_vertex(struct node *d){

    if (search_vertex(d) == -1)
    {
        tg->list[tg->num].d = d;//新增到list中
        tg->list[tg->num].next = NULL;

        tg->num++;//節點數累加
    }
}

　　d.新增邊，指定方向

//新增邊，指定方向，誰指向誰
void add_edge(struct node *from, struct node *to){

    add_vertex(from);
    add_vertex(to);

    struct vertex *v = &tg->list[search_vertex(from)];
    while (v->next != NULL)
    {
        v = v->next;
    }
    v->next = create_vertex(to);
}

　　e.檢測節點間是否有邊

//檢測節點from和to間是否有邊連線
int verifty_edge(struct node *from, struct node *to){

    if(tg->num == 0) return 0;
    
    int idx = search_vertex(from);
    if(idx == -1) return 0;

    struct vertex *v = &(tg->list[idx]);
    while(v != NULL){
        if(v->d->thread_id == to->thread_id) return 1;
        v = v->next;
    }

    return 0;
}

　　f.刪除邊

//刪除邊
void remove_edge(struct node *from, struct node *to){

    int idxi = search_vertex(from);
    int idxj = search_vertex(to);

    if(idxi != -1 && idxj !=-1){
        struct vertex *v = &tg->list[idxi];
        struct vertex *remove;
        while(v->next != NULL){
            if(v->next->d->thread_id == to->thread_id){//找到要刪除的節點
                remove = v->next;
                v->next = v->next->next;

                free(remove);
                break;
            }
            v = v->next;
        }
    }
}

（3）圖遍歷

　　本文采用圖遍歷中最為常用的深度優先搜尋進行遍歷，程式碼如下。

//dfs深度遍歷
int dfs(int idx){

    struct vertex *v = &tg->list[idx];
    
    if(visited[idx] == 1){//有環
        path[k++] = idx;
        print_deadlock();
        deadlock = 1;
        return 0;
    }

    visited[idx] =1;//被遍歷到了，賦值為1，保證同一個節點只能遍歷一次
    path[k++] = idx;
    while(v->next !=NULL){
        dfs(search_vertex(v->next->d));
        k--;
        v = v->next;
    }
    return 1;
}

//遍歷圖，任意從圖的一個節點出發，對每一個節點進行dfs遍歷
int search_for_cycle(int idx){

    struct vertex *v = &tg->list[idx];
    visited[idx] = 1;
    k = 0;
    path[k++] = idx;

    while(v->next != NULL){
        int i = 0;
        for (; i < tg->num; i++)
        {
            if(i == idx){
                continue;
            }
            visited[i] = 0;
        }

        for(i = 1; i <= THREAD_MAX; i++){
            path[i] = -1;
        }
        k = 1;
        
        dfs(search_vertex(v->next->d));
        v = v->next;
    }
}

（4）啟動檢測

　　啟動執行緒定時檢測圖是否有環，程式碼如下。

//從第0個節點開始dfs
void check_dead_lock(){

    int i = 0;
    deadlock = 0;
    for(;i < tg->num; i++){
        if(deadlock == 1) break;
        search_for_cycle(i);
    }
    if(deadlock == 0){
        printf("no deadlock\n");
    }
}

//檢測鎖執行緒func
static void *thread_func(void *args){

    while(1){
        sleep(5);
        check_dead_lock();
    }
}

//啟動檢測鎖執行緒
void start_check(){

    tg = (struct graph*)malloc(sizeof(struct graph));
    tg->num = 0;
    tg->lockidx = 0;

    pthread_t tid;
    pthread_create(&tid,NULL,thread_func,NULL);
}

　（5）鉤子hook

　　為了不改變專案原始碼，使用hook在應用程式呼叫系統加鎖、解鎖API時進行劫持，使其實際呼叫的是應用程式定義的加鎖、解鎖API；再進行加鎖、解鎖前，我們先去理解3個狀態，加鎖前、加鎖後、解鎖後，即：lock_before、lock_after、unlock_after，通過這三個函式與圖構建起來，具體實現如下。

//1.沒有被其他執行緒佔用，不用處理
//2.有被其它執行緒佔用，就要把邊構建起來
//    新增邊
void lock_before(uint64 thread_id, uint64 lockid){

    int idx = 0;
    for(;idx < tg->lockidx;idx++){
        if(tg->locklist[idx].lock_id == lockid){
            struct node from;
            from.thread_id = thread_id;
            add_vertex(&from);

            struct node to;
            to.thread_id = tg->locklist[idx].thread_id;
            tg->locklist[idx].degress++;
            add_vertex(&to);

            if(!verifty_edge(&from, &to)){
                add_edge(&from, &to);//新增邊
            }
        }
    }
}

//1.沒有被其它執行緒佔用
//先加入一個節點add_edge
//2.有被佔用
//是進不來lock_after的
//
//等unlock_after 釋放後
//        mtx沒有主人
void lock_after(uint64 threadid, uint64 lockid) {

    int idx = 0;
    if(-1 == (idx = search_lock(lockid))){
        int eidx = search_empty_lock();
        tg->locklist[eidx].thread_id = threadid;
        tg->locklist[eidx].lock_id = lockid;

        inc(&tg->lockidx, 1);
    }else{
        struct node from;
        from.thread_id = threadid;

        struct node to;
        to.thread_id = tg->locklist[idx].thread_id;
        tg->locklist[idx].degress--;

        if(verifty_edge(&from, &to)){
            remove_edge(&from, &to);//不在死鎖檢測的圈裡面了,所以刪除邊
        }
        tg->locklist[idx].thread_id = threadid;
    }
}

void unlock_after(uint64 threadid, uint64 lockid) {

    int idx = search_lock(lockid);
    if(tg->locklist[idx].degress == 0){
        tg->locklist[idx].thread_id = 0;
        tg->locklist[idx].lock_id = 0;
    }
}

　　honk鉤子主要實現pthread_mutex_lock、pthread_mutex_unlock的劫持，具體實現如下。

int pthread_mutex_lock(pthread_mutex_t *mutex){

    pthread_t selfid = pthread_self();
    lock_before(selfid, (uint64)mutex);
    pthread_mutex_lock_f(mutex);//執行系統加鎖的入口函式
    lock_after(selfid, (uint64)mutex);
}


int pthread_mutex_unlock(pthread_mutex_t * mutex){

    pthread_t selfid = pthread_self();
    pthread_mutex_unlock_f(mutex);//執行系統解鎖的入口函式
    unlock_after(selfid, (uint64)mutex);
}

static int init_hook(){

    pthread_mutex_lock_f = dlsym(RTLD_NEXT,"pthread_mutex_lock");
    pthread_mutex_unlock_f = dlsym(RTLD_NEXT,"pthread_mutex_unlock");
}

（6）Demo

//測試樣例
pthread_mutex_t mtx1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mtx4 = PTHREAD_MUTEX_INITIALIZER;

void *th_func1(void *arg) {
    
    pthread_mutex_lock(&mtx1);
    sleep(1);
    pthread_mutex_lock(&mtx2);
 
    pthread_mutex_unlock(&mtx2);
    pthread_mutex_unlock(&mtx1);
}

void *th_func2(void *arg) {

    pthread_mutex_lock(&mtx2);
    sleep(1);
    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);
    pthread_mutex_unlock(&mtx2);
}

void *th_func3(void *arg) {

    pthread_mutex_lock(&mtx3);
    sleep(1);
    pthread_mutex_lock(&mtx1);

    pthread_mutex_unlock(&mtx1);
    pthread_mutex_unlock(&mtx3);

}

void *th_func4(void *arg) {

    pthread_mutex_lock(&mtx2);
    sleep(1);
    pthread_mutex_lock(&mtx3);

    pthread_mutex_unlock(&mtx3);
    pthread_mutex_unlock(&mtx2);
}


int main(){

    init_hook();//初始化hook
    start_check();//啟動檢測死鎖執行緒
    pthread_t t1,t2,t3,t4;
    pthread_create(&t1,NULL,th_func1,NULL);
    pthread_create(&t2,NULL,th_func2,NULL);
    pthread_create(&t3,NULL,th_func3,NULL);
    pthread_create(&t4,NULL,th_func4,NULL);

    pthread_join(t1,NULL);
    pthread_join(t2,NULL);
    pthread_join(t3,NULL);
    pthread_join(t4,NULL);

    return 0;
}

死鎖檢測實現

死鎖檢測實現

Java-多執行緒-執行緒死鎖程式碼實現

Linux死鎖檢測-Lockdep

併發程式設計（執行緒）——驗證GIL鎖，GIL與普通互斥鎖的區別，io密集型和計算密集型，死鎖現象（解決方式：遞迴鎖），Semaphore訊號量，Event事件，執行緒queue，多程序實現tcp服務端併發，執行緒池&程序池

Java由於資源競爭實現死鎖

實現一個執行緒死鎖

Java 實現執行緒死鎖

深入剖析ReentrantLock公平鎖與非公平鎖原始碼實現

【重溫mysql】7、死鎖與innodb狀態日誌

分散式鎖的由來、特點、及Redis分散式鎖的實現詳解

聊聊分散式鎖的實現(一)

聊聊分散式鎖的實現(二)

Java併發程式設計入門（九）死鎖和死鎖定位

Redis分散式鎖（三）：支援鎖可重入，避免鎖遞迴呼叫時死鎖

記錄那些詭異的資料庫死鎖

redis分散式鎖的實現（1）- 分散式鎖的設計理論

Java執行緒死鎖例項及解決方法

Java併發程式設計預防死鎖過程詳解

Java模擬死鎖發生之演繹哲學家進餐問題案例詳解

關於MySQL死鎖問題的深入分析

死鎖檢測實現

相關推薦