Linux kernel中ktime_get()方法獲取的當前時間比之前的時間晚的debug code
在Linux kernel的測試過程中,我們發現ktime_get()獲得的當前時間比之前的時間還要晚,因此我們需要在一些debug,下面介紹一下debug過程中遇到的cpu同步的問題。
Linux kernel 中ktime_get()的實現如下:
ktime_t ktime_get(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base;
u64 nsecs;
WARN_ON (timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns (base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);
為了debug這個問題,我們對ktime_get()做如下修改:
static u64 last_ktime ;
ktime_t ktime_get(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, cur_ktime;
s64 nsecs;
WARN_ON(timekeeping_suspended) ;
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
cur_ktime = ktime_add_ns(base, nsecs);
if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {
printk("failed to get ktime, last ktime is %llu, "
"current ktime is %llu, nsecs is %lld, "
"timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
tmp);
BUG();
}
last_ktime = ktime_to_ns(cur_ktime);
return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);
則我們可能會引起Linux kernel panic,並得到如下資訊:
[ 41.502091] failed to get ktime, last ktime is 41502064202, current ktime is 41502064150
原因:
last_ktime是全域性變數,所有的cpu1都是對同一個last_ktime進行操作。
假如有兩個CPU:cpu0, cpu1,cpu0正在執行:
if (unlikely(last_ktime >= ktime_to_ns(cur_ktime))) {
期望cur_ktime的值與上一次cpu0取到的last_ktime的值進行比較,而cpu1剛好執行完:
last_ktime = ktime_to_ns(cur_ktime);
則全域性變數last_ktime此時被最新的值更新了,因此就出現了last_ktime 大於 cur_ktime的情況。
因此我們又對ktime_get()方法做了如下修改(將last_ktime改為percpu變數):
static DEFINE_PER_CPU(u64, last_ktime);
ktime_t ktime_get(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, cur_ktime;
s64 nsecs;
u64 *last_ktime_tmp, tmp = 0;
WARN_ON(timekeeping_suspended);
last_ktime_tmp = this_cpu_ptr(&last_ktime);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
tmp = ktime_to_ns(base);
cur_ktime = ktime_add_ns(base, nsecs);
if(ktime_to_ns(cur_ktime) > 20000000000)
if (unlikely((*last_ktime_tmp) > ktime_to_ns(cur_ktime))) {
printk("failed to get ktime, last ktime is %llu, "
"current ktime is %llu, nsecs is %lld, "
"timekeeping_suspended is %d, tmp is %llu\n", *last_ktime_tmp,
ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
tmp);
BUG();
}
last_ktime_tmp = &ktime_to_ns(cur_ktime);
return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);
將last_ktime改為percpu變數,我們解決了CPU之間的同步問題,但是又遇到其他的問題:
我們依舊假設當前有兩個CPU:cpu0, cpu1,假設當前cpu操作ktime_get()的順序如下:cpu0: ktime_get()—>cpu1:ktime_get()—>cpu0:ktime_get(),假設依次獲得獲得的last_ktime為:cpu0:last_ktime=41502064202—>cpu1:last_ktime=41502064302—>cpu0:last_ktime=41502064276,此時cpu1獲取的最新的last_ktime比cpu0獲取的最新的時間要大,但是因為last_ktime是percpu變數,因此我們並不能及時發現當前時間比之前的時間晚的問題。
最終我們選擇使用如下修改,該修改可以準確判斷當前時間比之前的時間晚的問題,同時又不會引發cpu同步問題:
static atomic64_t last_ktime;
ktime_t ktime_get(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, cur_ktime;
s64 nsecs;
u64 last_ktime_tmp, tmp = 0;
WARN_ON(timekeeping_suspended);
last_ktime_tmp = atomic64_read(&last_ktime);
barrier();
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
tmp = ktime_to_ns(base);
cur_ktime = ktime_add_ns(base, nsecs);
if (unlikely(last_ktime_tmp > ktime_to_ns(cur_ktime))) {
printk("failed to get ktime, last ktime is %llu, "
"current ktime is %llu, nsecs is %lld, "
"timekeeping_suspended is %d, tmp is %llu\n", last_ktime_tmp,
ktime_to_ns(cur_ktime), nsecs, timekeeping_suspended,
tmp);
BUG();
}
atomic64_set(&last_ktime, ktime_to_ns(cur_ktime));
return cur_ktime;
}
EXPORT_SYMBOL_GPL(ktime_get);