1. 程式人生 > >C++併發程式設計——在執行時選擇執行緒數量

C++併發程式設計——在執行時選擇執行緒數量

在編寫多執行緒程式時,執行多少執行緒比較合適呢?執行緒並不是越多越好,理論上,硬體支援多少執行緒數,就開多少個執行緒比較合適,有的比如完成埠IOCP中建議開2倍執行緒數,因為考慮到有些執行緒可能會掛起等情況。但最重要的一條,首先要獲取當前硬體支援的執行緒數,通常情況下為CPU核數。

std::thread::hardware_concurrency();    //獲取當前CPU核心數量

程式碼示例:
以下程式碼為std::accumulate的簡單並行版本實現,通過將大量的累加操作,分配給多個執行緒去計算,最後將各個執行緒計算的結果累加,得出最終結果。真正的平行計算任務分割是很麻煩的,這裡並不需要考慮執行緒的同步等問題。

template<typename Iterator,typename T>
struct accumulate_block
{
    void operator()(Iterator first,Iterator last,T& result)
    {
        result = std::accumulate(first,last,result);
    }
};

template<typename Iterator,typename T>
T parallel_accumulate(Iterator first,Iterator last,T init)
{
    unsigned
long const length=std::distance(first,last); if(!length) return init; unsigned long const min_per_thread=25; unsigned long const max_threads=(length+min_per_thread-1)/min_per_thread; //獲取最大執行緒數量 unsigned long const hardware_threads=std::thread::hardware_concurrency(); //獲取當前CPU核心數量
unsigned long const num_threads=std::min(hardware_threads!=0?hardware_threads:2,max_threads);//執行執行緒數量 unsigned long const block_size=length/num_threads; std::vector<T> results(num_threads); Iterator block_start=first; std::vector<std::thread> v_threads(num_threads-1); for(unsigned long i=0;i<num_threads-1;++i) { Iterator block_end=block_start; std::advance(block_end,block_size); v_threads[i]=std::thread(accumulate_block<Iterator,T>(),block_start,block_end,std::ref(results[i])); block_start=block_end; } accumulate_block<Iterator,T>()(block_start,last,results[num_threads-1]); //計算剩下的數,相當於在主執行緒中計算 std::for_each(v_threads.begin(),v_threads.end(),std::mem_fn(&std::thread::join));//等待所有執行緒計算完成 return std::accumulate(results.begin(),results.end(),init); } int _tmain(int argc, _TCHAR* argv[]) { std::vector<int> v(100000); std::iota(v.begin(),v.end(),1); long long sum=parallel_accumulate(v.begin(),v.end(),0); cout<<"sum="<<sum<<endl; return 0; }

相關STL原始碼:

//std::distance原始碼
template<class _BidIt,
    class _Diff> inline
        void _Distance2(_BidIt _First, _BidIt _Last, _Diff& _Off,
            bidirectional_iterator_tag)
    {   // add to _Off distance between bidirectional iterators (redundant)
    for (; _First != _Last; ++_First)
        ++_Off;
    }

template<class _InIt> inline
    typename iterator_traits<_InIt>::difference_type
        distance(_InIt _First, _InIt _Last)
    {   // return distance between iterators
    typename iterator_traits<_InIt>::difference_type _Off = 0;
    _Distance2(_First, _Last, _Off, _Iter_cat(_First));
    return (_Off);
    }
//std::advance原始碼
    // TEMPLATE FUNCTION advance
template<class _InIt,
    class _Diff> inline
    void _Advance(_InIt& _Where, _Diff _Off, input_iterator_tag)
    {   // increment iterator by offset, input iterators
 #if _ITERATOR_DEBUG_LEVEL == 2
    if (_Off < 0)
        _DEBUG_ERROR("negative offset in advance");
 #endif /* _ITERATOR_DEBUG_LEVEL == 2 */

    for (; 0 < _Off; --_Off)
        ++_Where;
    }

template<class _FwdIt,
    class _Diff> inline
    void _Advance(_FwdIt& _Where, _Diff _Off, forward_iterator_tag)
    {   // increment iterator by offset, forward iterators
 #if _ITERATOR_DEBUG_LEVEL == 2
    if (_Off < 0)
        _DEBUG_ERROR("negative offset in advance");
 #endif /* _ITERATOR_DEBUG_LEVEL == 2 */

    for (; 0 < _Off; --_Off)
        ++_Where;
    }

template<class _BidIt,
    class _Diff> inline
    void _Advance(_BidIt& _Where, _Diff _Off, bidirectional_iterator_tag)
    {   // increment iterator by offset, bidirectional iterators
    for (; 0 < _Off; --_Off)
        ++_Where;
    for (; _Off < 0; ++_Off)
        --_Where;
    }

template<class _RanIt,
    class _Diff> inline
    void _Advance(_RanIt& _Where, _Diff _Off, random_access_iterator_tag)
    {   // increment iterator by offset, random-access iterators
    _Where += _Off;
    }

template<class _InIt,
    class _Diff> inline
    void advance(_InIt& _Where, _Diff _Off)
    {   // increment iterator by offset, arbitrary iterators
    _Advance(_Where, _Off, _Iter_cat(_Where));
    }
//獲取硬體執行緒數量
    static unsigned int hardware_concurrency() _NOEXCEPT
        {   // return number of hardware thread contexts
        return (::Concurrency::details::_GetConcurrency());
        }