Kernel排程器負載均衡(一)
阿新 • • 發佈:2019-02-20
//find_busiest_group的目的是:如果在該排程域存在負載不均衡的情況,則找到負載最重的那個排程組,如果該排程域負載沒有失衡,則找到負載最輕的那個,後面會將其負載全部轉移到其他cpu上,以此來達到省電的目的。
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance);
if (*balance == 0)//沒有負載失衡
goto out_balanced;
if (!group) {//沒有負載失衡,不存在負載最大的排程組
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
}
busiest = find_busiest_queue(group, idle, imbalance, cpus );//在負載最重的排程組中尋找負載最重的排程佇列
if (!busiest) {//不存在負載最重的排程佇列
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
}
BUG_ON(busiest == this_rq);
schedstat_add(sd, lb_imbalance[idle], imbalance);
ld_moved = 0;
if (busiest->nr_running > 1) {//如果最忙的排程佇列中的任務個數不止1個。
/*
* Attempt to move tasks. If find_busiest_group has found
* an imbalance but busiest->nr_running <= 1, the group is
* still unbalanced. ld_moved simply stays zero, so it is
* correctly treated as an imbalance.
*/
local_irq_save(flags);
double_rq_lock(this_rq, busiest);
ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, idle, &all_pinned);//從負載最重的排程佇列中移動一些任務到該cpu的排程佇列中,需要移動的負載最大值為imbalance
double_rq_unlock(this_rq, busiest);
local_irq_restore(flags);
/*
* some other cpu did the load balance for us.
*/
if (ld_moved && this_cpu != smp_processor_id())//如果為該cpu進行負載均衡的cpu不止其本身,則該cpu需要被喚醒,因為它有活幹了。
resched_cpu(this_cpu);
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(all_pinned)) {//如果最忙的排程佇列上的所有任務被繫結到它們所執行的cpu上,即不能移動。那麼不在考慮該cpu上的負載了,如果該排程域中還有其他cpu的話,則繼續尋找最忙的cpu。
cpumask_clear_cpu(cpu_of(busiest), cpus);
if (!cpumask_empty(cpus))
goto redo;
goto out_balanced;
}
}
if (!ld_moved) {//如果移動任務失敗
schedstat_inc(sd, lb_failed[idle]);
/*
* Increment the failure counter only on periodic balance.
* We do not want newidle balance, which can be very
* frequent, pollute the failure counter causing
* excessive cache_hot migrations and active balances.
*/
if (idle != CPU_NEWLY_IDLE)
sd->nr_balance_failed++;
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the migration_thread, if the curr
* task on busiest cpu can't be moved to this_cpu
*/
if (!cpumask_test_cpu(this_cpu,
&busiest->curr->cpus_allowed)) {
spin_unlock_irqrestore(&busiest->lock, flags);
all_pinned = 1;
goto out_one_pinned;
}
if (!busiest->active_balance) {//準備啟用migration_thread,採用“推任務”的方式
busiest->active_balance = 1;
busiest->push_cpu = this_cpu;//將任務推到該cpu上
active_balance = 1;
}
spin_unlock_irqrestore(&busiest->lock, flags);
if (active_balance)
wake_up_process(busiest->migration_thread);//喚醒migration_thread
/*
* We've kicked active balancing, reset the failure
* counter.
*/
sd->nr_balance_failed = sd->cache_nice_tries+1;
}
} else
sd->nr_balance_failed = 0;
if (likely(!active_balance)) {
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
} else {
/*
* If we've begun active balancing, start to back off. This
* case may not be covered by the all_pinned logic if there
* is only 1 task on the busy runqueue (because we don't call
* move_tasks).
*/
if (sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2;
}
if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
ld_moved = -1;
goto out;
out_balanced:
schedstat_inc(sd, lb_balanced[idle]);
sd->nr_balance_failed = 0;
out_one_pinned:
/* tune up the balancing interval */
if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) ||
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
ld_moved = -1;
else
ld_moved = 0;
out:
if (ld_moved)
update_shares(sd);
return ld_moved;
}
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance);
if (*balance == 0)//沒有負載失衡
goto out_balanced;
if (!group) {//沒有負載失衡,不存在負載最大的排程組
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
}
busiest = find_busiest_queue(group, idle, imbalance, cpus
if (!busiest) {//不存在負載最重的排程佇列
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
}
BUG_ON(busiest == this_rq);
schedstat_add(sd, lb_imbalance[idle], imbalance);
ld_moved = 0;
if (busiest->nr_running > 1) {//如果最忙的排程佇列中的任務個數不止1個。
/*
* Attempt to move tasks. If find_busiest_group has found
* an imbalance but busiest->nr_running <= 1, the group is
* still unbalanced. ld_moved simply stays zero, so it is
* correctly treated as an imbalance.
*/
local_irq_save(flags);
double_rq_lock(this_rq, busiest);
ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, idle, &all_pinned);//從負載最重的排程佇列中移動一些任務到該cpu的排程佇列中,需要移動的負載最大值為imbalance
double_rq_unlock(this_rq, busiest);
local_irq_restore(flags);
/*
* some other cpu did the load balance for us.
*/
if (ld_moved && this_cpu != smp_processor_id())//如果為該cpu進行負載均衡的cpu不止其本身,則該cpu需要被喚醒,因為它有活幹了。
resched_cpu(this_cpu);
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(all_pinned)) {//如果最忙的排程佇列上的所有任務被繫結到它們所執行的cpu上,即不能移動。那麼不在考慮該cpu上的負載了,如果該排程域中還有其他cpu的話,則繼續尋找最忙的cpu。
cpumask_clear_cpu(cpu_of(busiest), cpus);
if (!cpumask_empty(cpus))
goto redo;
goto out_balanced;
}
}
if (!ld_moved) {//如果移動任務失敗
schedstat_inc(sd, lb_failed[idle]);
/*
* Increment the failure counter only on periodic balance.
* We do not want newidle balance, which can be very
* frequent, pollute the failure counter causing
* excessive cache_hot migrations and active balances.
*/
if (idle != CPU_NEWLY_IDLE)
sd->nr_balance_failed++;
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the migration_thread, if the curr
* task on busiest cpu can't be moved to this_cpu
*/
if (!cpumask_test_cpu(this_cpu,
&busiest->curr->cpus_allowed)) {
spin_unlock_irqrestore(&busiest->lock, flags);
all_pinned = 1;
goto out_one_pinned;
}
if (!busiest->active_balance) {//準備啟用migration_thread,採用“推任務”的方式
busiest->active_balance = 1;
busiest->push_cpu = this_cpu;//將任務推到該cpu上
active_balance = 1;
}
spin_unlock_irqrestore(&busiest->lock, flags);
if (active_balance)
wake_up_process(busiest->migration_thread);//喚醒migration_thread
/*
* We've kicked active balancing, reset the failure
* counter.
*/
sd->nr_balance_failed = sd->cache_nice_tries+1;
}
} else
sd->nr_balance_failed = 0;
if (likely(!active_balance)) {
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
} else {
/*
* If we've begun active balancing, start to back off. This
* case may not be covered by the all_pinned logic if there
* is only 1 task on the busy runqueue (because we don't call
* move_tasks).
*/
if (sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2;
}
if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
ld_moved = -1;
goto out;
out_balanced:
schedstat_inc(sd, lb_balanced[idle]);
sd->nr_balance_failed = 0;
out_one_pinned:
/* tune up the balancing interval */
if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) ||
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
ld_moved = -1;
else
ld_moved = 0;
out:
if (ld_moved)
update_shares(sd);
return ld_moved;
}