1. 程式人生 > >Kernel排程器負載均衡(一)

Kernel排程器負載均衡(一)

    //find_busiest_group的目的是:如果在該排程域存在負載不均衡的情況,則找到負載最重的那個排程組,如果該排程域負載沒有失衡,則找到負載最輕的那個,後面會將其負載全部轉移到其他cpu上,以此來達到省電的目的。
    group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance);

    if (*balance == 0)//沒有負載失衡
        goto out_balanced;

    if (!group) {//沒有負載失衡,不存在負載最大的排程組
        schedstat_inc(sd, lb_nobusyg[idle]);
        goto out_balanced;
    }

    busiest = find_busiest_queue(group, idle, imbalance, cpus
);//在負載最重的排程組中尋找負載最重的排程佇列
    if (!busiest) {//不存在負載最重的排程佇列
        schedstat_inc(sd, lb_nobusyq[idle]);
        goto out_balanced;
    }

    BUG_ON(busiest == this_rq);

    schedstat_add(sd, lb_imbalance[idle], imbalance);

    ld_moved = 0;
    if (busiest->nr_running > 1) {//如果最忙的排程佇列中的任務個數不止1個。
        /*
         * Attempt to move tasks. If find_busiest_group has found
         * an imbalance but busiest->nr_running <= 1, the group is
         * still unbalanced. ld_moved simply stays zero, so it is
         * correctly treated as an imbalance.
         */
        local_irq_save(flags);
        double_rq_lock(this_rq, busiest);
        ld_moved = move_tasks(this_rq, this_cpu, busiest,
                      imbalance, sd, idle, &all_pinned);//從負載最重的排程佇列中移動一些任務到該cpu的排程佇列中,需要移動的負載最大值為imbalance

        double_rq_unlock(this_rq, busiest);
        local_irq_restore(flags);

        /*
         * some other cpu did the load balance for us.
         */
        if (ld_moved && this_cpu != smp_processor_id())//如果為該cpu進行負載均衡的cpu不止其本身,則該cpu需要被喚醒,因為它有活幹了。
            resched_cpu(this_cpu);

        /* All tasks on this runqueue were pinned by CPU affinity */
        if (unlikely(all_pinned)) {//如果最忙的排程佇列上的所有任務被繫結到它們所執行的cpu上,即不能移動。那麼不在考慮該cpu上的負載了,如果該排程域中還有其他cpu的話,則繼續尋找最忙的cpu。

            cpumask_clear_cpu(cpu_of(busiest), cpus);
            if (!cpumask_empty(cpus))
                goto redo;
            goto out_balanced;
        }
    }

    if (!ld_moved) {//如果移動任務失敗
        schedstat_inc(sd, lb_failed[idle]);
        /*
         * Increment the failure counter only on periodic balance.
         * We do not want newidle balance, which can be very
         * frequent, pollute the failure counter causing
         * excessive cache_hot migrations and active balances.
         */
        if (idle != CPU_NEWLY_IDLE)
            sd->nr_balance_failed++;

        if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {

            spin_lock_irqsave(&busiest->lock, flags);

            /* don't kick the migration_thread, if the curr
             * task on busiest cpu can't be moved to this_cpu
             */
            if (!cpumask_test_cpu(this_cpu,
                          &busiest->curr->cpus_allowed)) {
                spin_unlock_irqrestore(&busiest->lock, flags);
                all_pinned = 1;
                goto out_one_pinned;
            }

            if (!busiest->active_balance) {//準備啟用migration_thread,採用“推任務”的方式
                busiest->active_balance = 1;
                busiest->push_cpu = this_cpu;//將任務推到該cpu上
                active_balance = 1;
            }
            spin_unlock_irqrestore(&busiest->lock, flags);
            if (active_balance)
                wake_up_process(busiest->migration_thread);//喚醒migration_thread

            /*
             * We've kicked active balancing, reset the failure
             * counter.
             */
            sd->nr_balance_failed = sd->cache_nice_tries+1;
        }
    } else
        sd->nr_balance_failed = 0;

    if (likely(!active_balance)) {
        /* We were unbalanced, so reset the balancing interval */
        sd->balance_interval = sd->min_interval;
    } else {
        /*
         * If we've begun active balancing, start to back off. This
         * case may not be covered by the all_pinned logic if there
         * is only 1 task on the busy runqueue (because we don't call
         * move_tasks).
         */
        if (sd->balance_interval < sd->max_interval)
            sd->balance_interval *= 2;
    }

    if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
        !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
        ld_moved = -1;

    goto out;

out_balanced:
    schedstat_inc(sd, lb_balanced[idle]);

    sd->nr_balance_failed = 0;

out_one_pinned:
    /* tune up the balancing interval */
    if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) ||
            (sd->balance_interval < sd->max_interval))
        sd->balance_interval *= 2;

    if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
        !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
        ld_moved = -1;
    else
        ld_moved = 0;
out:
    if (ld_moved)
        update_shares(sd);
    return ld_moved;
}