Linux kernel中ktime_get()方法獲取的當前時間比之前的時間晚的debug code

阿新 • • 發佈：2018-12-21

在Linux kernel的測試過程中，我們發現ktime_get()獲得的當前時間比之前的時間還要晚，因此我們需要在一些debug，下面介紹一下debug過程中遇到的cpu同步的問題。
Linux kernel 中ktime_get()的實現如下：

ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base;
        u64 nsecs;

        WARN_ON 
(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns 
(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);

為了debug這個問題，我們對ktime_get()做如下修改：

static u64 last_ktime ;
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;

        WARN_ON(timekeeping_suspended) 
;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        cur_ktime = ktime_add_ns(base, nsecs);
        if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        last_ktime = ktime_to_ns(cur_ktime);

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

則我們可能會引起Linux kernel panic，並得到如下資訊：

[   41.502091] failed to get ktime, last ktime is 41502064202, current ktime is 41502064150

原因：
last_ktime是全域性變數，所有的cpu1都是對同一個last_ktime進行操作。
假如有兩個CPU：cpu0, cpu1，cpu0正在執行：

if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {

期望cur_ktime的值與上一次cpu0取到的last_ktime的值進行比較，而cpu1剛好執行完：

last_ktime = ktime_to_ns(cur_ktime);

則全域性變數last_ktime此時被最新的值更新了，因此就出現了last_ktime 大於 cur_ktime的情況。
因此我們又對ktime_get()方法做了如下修改（將last_ktime改為percpu變數）：

static DEFINE_PER_CPU(u64, last_ktime);
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;
        u64 *last_ktime_tmp, tmp = 0;

        WARN_ON(timekeeping_suspended);

        last_ktime_tmp = this_cpu_ptr(&last_ktime);
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        tmp = ktime_to_ns(base);
        cur_ktime = ktime_add_ns(base, nsecs);
        if(ktime_to_ns(cur_ktime) > 20000000000)
        if (unlikely((*last_ktime_tmp) > ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        last_ktime_tmp = &ktime_to_ns(cur_ktime);

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

將last_ktime改為percpu變數，我們解決了CPU之間的同步問題，但是又遇到其他的問題：
我們依舊假設當前有兩個CPU：cpu0, cpu1，假設當前cpu操作ktime_get()的順序如下：cpu0: ktime_get()—>cpu1:ktime_get()—>cpu0:ktime_get()，假設依次獲得獲得的last_ktime為：cpu0:last_ktime=41502064202—>cpu1:last_ktime=41502064302—>cpu0:last_ktime=41502064276,此時cpu1獲取的最新的last_ktime比cpu0獲取的最新的時間要大，但是因為last_ktime是percpu變數，因此我們並不能及時發現當前時間比之前的時間晚的問題。
最終我們選擇使用如下修改，該修改可以準確判斷當前時間比之前的時間晚的問題，同時又不會引發cpu同步問題：

static atomic64_t last_ktime;
ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, cur_ktime;
        s64 nsecs;
        u64 last_ktime_tmp, tmp = 0;

        WARN_ON(timekeeping_suspended);

        last_ktime_tmp = atomic64_read(&last_ktime);
        barrier();
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        tmp = ktime_to_ns(base);
        cur_ktime = ktime_add_ns(base, nsecs);
        if (unlikely(last_ktime_tmp > ktime_to_ns(cur_ktime))) {
                printk("failed to get ktime, last ktime is %llu, "
                        "current ktime is %llu, nsecs is %lld, "
                        "timekeeping_suspended is %d, tmp is %llu\n", last_ktime_tmp,
                         ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
                        tmp);
                BUG();
        }
        atomic64_set(&last_ktime, ktime_to_ns(cur_ktime));

        return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);

Linux kernel中ktime_get()方法獲取的當前時間比之前的時間晚的debug code

Linux kernel中ktime_get()方法獲取的當前時間比之前的時間晚的debug code

Linux下C語言實現獲取當前時間

分享java程式設計中一個小程式：獲取當前系統日期和時間在小程式中顯示出來

Android獲取當前系統日期和時間的三種方法

在linux kernel中列印函式呼叫的堆疊的方法

定義一個方法獲取當前年月

Linux kernel 中的work queue原理

JS實時獲取當前的日期和時間並顯示

python datetime模組來獲取當前的日期和時間

Linux kernel中的IS_ENABLED

python獲取當前的日期和時間

C語言獲取當前的日期及時間

C# Winform中如何實現獲取當前使用者的網路名和登入名【資料庫操作】

《Qt5+QDate和QTime獲取當前系統日期和時間》

HTML獲取當前頁面的系統時間和星期幾

linux kernel中沒有include/linux/version.h檔案

python 通過類中一個方法獲取另一個方法變數

python:如何獲取當前的日期和時間

Js獲取當前的日期和時間

使用@JsonFormat引起的時間比正常時間慢8小時解決方法

Linux kernel中ktime_get()方法獲取的當前時間比之前的時間晚的debug code

相關推薦